From ff84469a4575f1ef8a0010046fde52240a312cae Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Mon, 18 Apr 2016 12:29:09 -0500 Subject: [PATCH 01/14] Applied various compilation fixes to bgq kernels. --- kernels/bgq/1/bli_axpyv_opt_var1.c | 2 +- kernels/bgq/1/bli_dotv_opt_var1.c | 2 +- kernels/bgq/1f/bli_axpyf_opt_var1.c | 4 +- kernels/bgq/3/bli_gemm_int_8x8.c | 60 ++++++++++++++--------------- 4 files changed, 33 insertions(+), 35 deletions(-) diff --git a/kernels/bgq/1/bli_axpyv_opt_var1.c b/kernels/bgq/1/bli_axpyv_opt_var1.c index b6131e5ee..33cd4dc0a 100644 --- a/kernels/bgq/1/bli_axpyv_opt_var1.c +++ b/kernels/bgq/1/bli_axpyv_opt_var1.c @@ -54,7 +54,7 @@ void bli_daxpyv_opt_var1 } // Call the reference implementation if needed. if ( use_ref == TRUE ) { - BLIS_DAXPYV_KERNEL_REF( conjx, n, alpha, x, incx, y, incy ); + BLIS_DAXPYV_KERNEL_REF( conjx, n, alpha, x, incx, y, incy, cntx ); return; } diff --git a/kernels/bgq/1/bli_dotv_opt_var1.c b/kernels/bgq/1/bli_dotv_opt_var1.c index b54b1176f..674b1e653 100644 --- a/kernels/bgq/1/bli_dotv_opt_var1.c +++ b/kernels/bgq/1/bli_dotv_opt_var1.c @@ -58,7 +58,7 @@ void bli_ddotv_opt_var1 use_ref = TRUE; // Call the reference implementation if needed. if ( use_ref ) { - BLIS_DDOTV_KERNEL_REF( conjx, conjy, n, x, incx, y, incy, rho ); + BLIS_DDOTV_KERNEL_REF( conjx, conjy, n, x, incx, y, incy, rho, cntx ); return; } diff --git a/kernels/bgq/1f/bli_axpyf_opt_var1.c b/kernels/bgq/1f/bli_axpyf_opt_var1.c index 5bcae61c7..ceff0f32c 100644 --- a/kernels/bgq/1f/bli_axpyf_opt_var1.c +++ b/kernels/bgq/1f/bli_axpyf_opt_var1.c @@ -56,14 +56,14 @@ void bli_daxpyf_opt_var1 // printf("%d\t%d\t%d\t%d\t%d\t%d\t%d\n", b_n, fusefac, inca, incx, incy, bli_is_unaligned_to(a, 32), bli_is_unaligned_to( y, 32)); // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. - if ( b_n < fusefac) || inca != 1 || incx != 1 || incy != 1 || bli_is_unaligned_to( a, 32 ) || bli_is_unaligned_to( y, 32 ) ) + if ( ( b_n < fusefac) || inca != 1 || incx != 1 || incy != 1 || bli_is_unaligned_to( a, 32 ) || bli_is_unaligned_to( y, 32 ) ) use_ref = TRUE; // Call the reference implementation if needed. if ( use_ref == TRUE ) { // printf("%d\t%d\t%d\t%d\t%d\t%d\n", fusefac, inca, incx, incy, bli_is_unaligned_to(a, 32), bli_is_unaligned_to( y, 32)); // printf("DEFAULTING TO REFERENCE IMPLEMENTATION\n"); - BLIS_DAXPYF_KERNEL_REF( conja, conjx, m, b_n, alpha, a, inca, lda, x, incx, y, incy ); + BLIS_DAXPYF_KERNEL_REF( conja, conjx, m, b_n, alpha, a, inca, lda, x, incx, y, incy, cntx ); return; } diff --git a/kernels/bgq/3/bli_gemm_int_8x8.c b/kernels/bgq/3/bli_gemm_int_8x8.c index 363155738..3aaa7d24c 100644 --- a/kernels/bgq/3/bli_gemm_int_8x8.c +++ b/kernels/bgq/3/bli_gemm_int_8x8.c @@ -215,20 +215,18 @@ void printvec(vector4double v) void bli_zgemm_int_8x8 ( dim_t k, - scomplex* restrict alpha, - scomplex* restrict a, - scomplex* restrict b, - scomplex* restrict beta, - scomplex* restrict c, inc_t rs_c, inc_t cs_c, + dcomplex* restrict alpha, + dcomplex* restrict a, + dcomplex* restrict b, + dcomplex* restrict beta, + dcomplex* restrict c, inc_t rs_c, inc_t cs_c, auxinfo_t* restrict data, cntx_t* restrict cntx ) { - double * alpha = (double*) alpha_z; - double * beta = (double*) beta_z; - double * a = (double*) a_z; - double * b = (double*) b_z; - double * c = (double*) c_z; + double* a_d = ( double* )a; + double* b_d = ( double* )b; + double* c_d = ( double* )c; //Registers for storing C. //2 2x4 subblocks of C, c0, and c1 @@ -259,13 +257,13 @@ void bli_zgemm_int_8x8 for( dim_t i = 0; i < k; i++ ) { - b0 = vec_ld2a( 0 * sizeof(double), &b[8*i] ); - b1 = vec_ld2a( 2 * sizeof(double), &b[8*i] ); - b2 = vec_ld2a( 4 * sizeof(double), &b[8*i] ); - b3 = vec_ld2a( 6 * sizeof(double), &b[8*i] ); + b0 = vec_ld2a( 0 * sizeof(double), &b_d[8*i] ); + b1 = vec_ld2a( 2 * sizeof(double), &b_d[8*i] ); + b2 = vec_ld2a( 4 * sizeof(double), &b_d[8*i] ); + b3 = vec_ld2a( 6 * sizeof(double), &b_d[8*i] ); - a0 = vec_lda ( 0 * sizeof(double), &a[8*i] ); - a1 = vec_lda ( 4 * sizeof(double), &a[8*i] ); + a0 = vec_lda ( 0 * sizeof(double), &a_d[8*i] ); + a1 = vec_lda ( 4 * sizeof(double), &a_d[8*i] ); c00a = vec_xmadd ( b0, a0, c00a ); c00b = vec_xxcpnmadd( a0, b0, c00b ); @@ -299,10 +297,10 @@ void bli_zgemm_int_8x8 vector4double C1 = vec_splats( 0.0 ); vector4double C2 = vec_splats( 0.0 ); - double alphar = *alpha; - double alphai = *(alpha+1); - double betar = *beta; - double betai = *(beta+1); + double alphar = bli_zreal( alpha ); + double alphai = bli_zimag( alpha ); + double betar = bli_zreal( beta ); + double betai = bli_zimag( beta ); vector4double alphav = vec_splats( 0.0 ); vector4double betav = vec_splats( 0.0 ); alphav = vec_insert( alphar, alphav, 0); @@ -359,15 +357,15 @@ void bli_zgemm_int_8x8 } - ZUPDATE( c00a, c00b, c, 0 ); - ZUPDATE( c10a, c10b, c, 4 ); - c += 2*cs_c; - ZUPDATE( c01a, c01b, c, 0 ); - ZUPDATE( c11a, c11b, c, 4 ); - c += 2*cs_c; - ZUPDATE( c02a, c02b, c, 0 ); - ZUPDATE( c12a, c12b, c, 4 ); - c += 2*cs_c; - ZUPDATE( c03a, c03b, c, 0 ); - ZUPDATE( c13a, c13b, c, 4 ); + ZUPDATE( c00a, c00b, c_d, 0 ); + ZUPDATE( c10a, c10b, c_d, 4 ); + c_d += 2*cs_c; + ZUPDATE( c01a, c01b, c_d, 0 ); + ZUPDATE( c11a, c11b, c_d, 4 ); + c_d += 2*cs_c; + ZUPDATE( c02a, c02b, c_d, 0 ); + ZUPDATE( c12a, c12b, c_d, 4 ); + c_d += 2*cs_c; + ZUPDATE( c03a, c03b, c_d, 0 ); + ZUPDATE( c13a, c13b, c_d, 4 ); } From 0e1a9821d860f6c1d818baf4c48d21a23726c132 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Tue, 19 Apr 2016 11:44:37 -0500 Subject: [PATCH 02/14] Add configure options and generate bli_config.h automatically. Options to configure have been added for: - Setting the internal BLIS and BLAS/CBLAS integer sizes. - Enabling and disabling the BLAS and CBLAS layers. Additionally, configure options which require defining macros (the above plus the threading model), write their macros to the automatically-generated bli_config.h file in the top-level build directory. The old bli_config.h files in the config dirs were removed, and any kernel-related macros (SIMD size and alignment etc.) were moved to bli_kernel.h. The Makefiles were also modified to find the new bli_config.h file. Lastly, support for OMP in clang has been added (closes #56). --- Makefile | 4 +- .../bli_config.h => build/bli_config.h.in | 45 ++++++- common.mk | 15 ++- config/armv7a/bli_config.h | 43 ------- config/armv8a/bli_config.h | 44 ------- config/armv8a/bli_kernel.h | 2 + config/bgq/bli_config.h | 45 ------- config/bgq/bli_kernel.h | 3 + config/bulldozer/bli_config.h | 43 ------- config/bulldozer/bli_kernel.h | 2 + config/carrizo/bli_config.h | 45 ------- config/carrizo/bli_kernel.h | 2 + config/cortex-a15/bli_config.h | 44 ------- config/cortex-a15/bli_kernel.h | 2 + config/cortex-a9/bli_config.h | 44 ------- config/cortex-a9/bli_kernel.h | 2 + config/dunnington/bli_config.h | 42 ------ config/dunnington/bli_kernel.h | 2 + config/emscripten/bli_config.h | 44 ------- config/emscripten/bli_kernel.h | 2 + config/haswell/bli_config.h | 41 ------ config/loongson3a/bli_config.h | 44 ------- config/loongson3a/bli_kernel.h | 2 + config/mic/bli_config.h | 50 -------- config/mic/bli_kernel.h | 10 ++ config/piledriver/bli_config.h | 43 ------- config/piledriver/bli_kernel.h | 2 + config/pnacl/bli_config.h | 44 ------- config/pnacl/bli_kernel.h | 2 + config/power7/bli_config.h | 42 ------ config/sandybridge/bli_config.h | 40 ------ config/template/bli_config.h | 41 ------ configure | 121 ++++++++++++++++-- frame/include/bli_config_macro_defs.h | 6 +- frame/include/bli_system.h | 9 ++ frame/include/blis.h | 10 +- testsuite/Makefile | 4 +- 37 files changed, 221 insertions(+), 765 deletions(-) rename config/reference/bli_config.h => build/bli_config.h.in (68%) delete mode 100644 config/armv7a/bli_config.h delete mode 100644 config/armv8a/bli_config.h delete mode 100644 config/bgq/bli_config.h delete mode 100644 config/bulldozer/bli_config.h delete mode 100644 config/carrizo/bli_config.h delete mode 100644 config/cortex-a15/bli_config.h delete mode 100644 config/cortex-a9/bli_config.h delete mode 100644 config/dunnington/bli_config.h delete mode 100644 config/emscripten/bli_config.h delete mode 100644 config/haswell/bli_config.h delete mode 100644 config/loongson3a/bli_config.h delete mode 100644 config/mic/bli_config.h delete mode 100644 config/piledriver/bli_config.h delete mode 100644 config/pnacl/bli_config.h delete mode 100644 config/power7/bli_config.h delete mode 100644 config/sandybridge/bli_config.h delete mode 100644 config/template/bli_config.h diff --git a/Makefile b/Makefile index fd77137a9..e52cebd57 100644 --- a/Makefile +++ b/Makefile @@ -257,7 +257,7 @@ endif # Expand the fragment paths that contain .h files to attain the set of header # files present in all fragment paths. -MK_HEADER_FILES := $(foreach frag_path, $(FRAGMENT_DIR_PATHS), \ +MK_HEADER_FILES := $(foreach frag_path, . $(FRAGMENT_DIR_PATHS), \ $(wildcard $(frag_path)/*.h)) # Strip the leading, internal, and trailing whitespace from our list of header @@ -268,7 +268,7 @@ MK_HEADER_FILES := $(strip $(MK_HEADER_FILES)) # expansion. Then, strip the header filename to leave the path to each header # location. Notice this process even weeds out duplicates! Add the config # directory manually since it contains FLA_config.h. -MK_HEADER_DIR_PATHS := $(dir $(foreach frag_path, $(FRAGMENT_DIR_PATHS), \ +MK_HEADER_DIR_PATHS := $(dir $(foreach frag_path, . $(FRAGMENT_DIR_PATHS), \ $(firstword $(wildcard $(frag_path)/*.h)))) # Add -I to each header path so we can specify our include search paths to the diff --git a/config/reference/bli_config.h b/build/bli_config.h.in similarity index 68% rename from config/reference/bli_config.h rename to build/bli_config.h.in index 5195e61c5..269998283 100644 --- a/config/reference/bli_config.h +++ b/build/bli_config.h.in @@ -35,7 +35,48 @@ #ifndef BLIS_CONFIG_H #define BLIS_CONFIG_H - - +#if @enable_pthreads@ +#define BLIS_ENABLE_PTHREADS #endif +#if @enable_openmp@ +#define BLIS_ENABLE_OPENMP +#endif + +#if @int_type_size@ == 64 +#define BLIS_INT_TYPE_SIZE 64 +#elif @int_type_size@ == 32 +#define BLIS_INT_TYPE_SIZE 32 +#else +// determine automatically +#endif + +#if @blas2blis_int_type_size@ == 64 +#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 64 +#elif @blas2blis_int_type_size@ == 32 +#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32 +#else +// determine automatically +#endif + +#ifndef BLIS_ENABLE_BLAS2BLIS +#ifndef BLIS_DISABLE_BLAS2BLIS +#if @enable_blas2blis@ +#define BLIS_ENABLE_BLAS2BLIS +#else +#define BLIS_DISABLE_BLAS2BLIS +#endif +#endif +#endif + +#ifndef BLIS_ENABLE_CBLAS +#ifndef BLIS_DISABLE_CBLAS +#if @enable_cblas@ +#define BLIS_ENABLE_CBLAS +#else +#define BLIS_DISABLE_CBLAS +#endif +#endif +#endif + +#endif diff --git a/common.mk b/common.mk index 7c3d4606d..dccc450cf 100644 --- a/common.mk +++ b/common.mk @@ -156,11 +156,11 @@ ifeq ($(THREADING_MODEL),auto) THREADING_MODEL := omp endif ifeq ($(THREADING_MODEL),omp) -CTHREADFLAGS := -fopenmp -DBLIS_ENABLE_OPENMP +CTHREADFLAGS := -fopenmp LDFLAGS += -fopenmp endif ifeq ($(THREADING_MODEL),pthreads) -CTHREADFLAGS := -pthread -DBLIS_ENABLE_PTHREADS +CTHREADFLAGS := -pthread LDFLAGS += -lpthread endif endif @@ -170,11 +170,11 @@ ifeq ($(THREADING_MODEL),auto) THREADING_MODEL := omp endif ifeq ($(THREADING_MODEL),omp) -CTHREADFLAGS := -openmp -DBLIS_ENABLE_OPENMP +CTHREADFLAGS := -openmp LDFLAGS += -openmp endif ifeq ($(THREADING_MODEL),pthreads) -CTHREADFLAGS := -pthread -DBLIS_ENABLE_PTHREADS +CTHREADFLAGS := -pthread LDFLAGS += -lpthread endif endif @@ -184,10 +184,11 @@ ifeq ($(THREADING_MODEL),auto) THREADING_MODEL := pthreads endif ifeq ($(THREADING_MODEL),omp) -$(error OpenMP is not supported with Clang.) +CTHREADFLAGS := -fopenmp +LDFLAGS += -fopenmp endif ifeq ($(THREADING_MODEL),pthreads) -CTHREADFLAGS := -pthread -DBLIS_ENABLE_PTHREADS +CTHREADFLAGS := -pthread LDFLAGS += -lpthread endif endif @@ -207,10 +208,12 @@ CFLAGS_KERNELS := $(CKOPTFLAGS) $(CVECFLAGS) $(CFLAGS_NOOPT) ifeq ($(V),1) BLIS_ENABLE_VERBOSE_MAKE_OUTPUT := yes +BLIS_ENABLE_TEST_OUTPUT := yes endif ifeq ($(V),0) BLIS_ENABLE_VERBOSE_MAKE_OUTPUT := no +BLIS_ENABLE_TEST_OUTPUT := no endif diff --git a/config/armv7a/bli_config.h b/config/armv7a/bli_config.h deleted file mode 100644 index 21041fc2a..000000000 --- a/config/armv7a/bli_config.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_CONFIG_H -#define BLIS_CONFIG_H - - -#define BLIS_INT_TYPE_SIZE 32 -#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32 - - -#endif diff --git a/config/armv8a/bli_config.h b/config/armv8a/bli_config.h deleted file mode 100644 index 127a2bca7..000000000 --- a/config/armv8a/bli_config.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_CONFIG_H -#define BLIS_CONFIG_H - - -#define BLIS_INT_TYPE_SIZE 64 -#define BLIS_SIMD_ALIGN_SIZE 16 -#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32 - - -#endif diff --git a/config/armv8a/bli_kernel.h b/config/armv8a/bli_kernel.h index 38eaef60d..b9ae3aec1 100644 --- a/config/armv8a/bli_kernel.h +++ b/config/armv8a/bli_kernel.h @@ -38,6 +38,8 @@ // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- +#define BLIS_SIMD_ALIGN_SIZE 16 + // -- Cache blocksizes -- // diff --git a/config/bgq/bli_config.h b/config/bgq/bli_config.h deleted file mode 100644 index 9810b5c11..000000000 --- a/config/bgq/bli_config.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_CONFIG_H -#define BLIS_CONFIG_H - - -#undef restrict - -#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32 - - - -#endif diff --git a/config/bgq/bli_kernel.h b/config/bgq/bli_kernel.h index 6d8593515..fd8446c34 100644 --- a/config/bgq/bli_kernel.h +++ b/config/bgq/bli_kernel.h @@ -36,6 +36,9 @@ #define BLIS_KERNEL_H +#undef restrict + + // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- // -- Cache blocksizes -- diff --git a/config/bulldozer/bli_config.h b/config/bulldozer/bli_config.h deleted file mode 100644 index 1f99e7e53..000000000 --- a/config/bulldozer/bli_config.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_CONFIG_H -#define BLIS_CONFIG_H - - -#define BLIS_SIMD_ALIGN_SIZE 16 - - - -#endif diff --git a/config/bulldozer/bli_kernel.h b/config/bulldozer/bli_kernel.h index b750b6da2..d7baa59e8 100644 --- a/config/bulldozer/bli_kernel.h +++ b/config/bulldozer/bli_kernel.h @@ -38,6 +38,8 @@ // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- +#define BLIS_SIMD_ALIGN_SIZE 16 + // -- Cache blocksizes -- // diff --git a/config/carrizo/bli_config.h b/config/carrizo/bli_config.h deleted file mode 100644 index 86a584112..000000000 --- a/config/carrizo/bli_config.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_CONFIG_H -#define BLIS_CONFIG_H - - -#define BLIS_SIMD_ALIGN_SIZE 16 - - - - - -#endif diff --git a/config/carrizo/bli_kernel.h b/config/carrizo/bli_kernel.h index 241f08a81..41175e734 100644 --- a/config/carrizo/bli_kernel.h +++ b/config/carrizo/bli_kernel.h @@ -38,6 +38,8 @@ // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- +#define BLIS_SIMD_ALIGN_SIZE 16 + // -- Cache blocksizes -- // diff --git a/config/cortex-a15/bli_config.h b/config/cortex-a15/bli_config.h deleted file mode 100644 index 753712540..000000000 --- a/config/cortex-a15/bli_config.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_CONFIG_H -#define BLIS_CONFIG_H - - -#define BLIS_INT_TYPE_SIZE 32 -#define BLIS_SIMD_ALIGN_SIZE 16 -#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32 - - -#endif diff --git a/config/cortex-a15/bli_kernel.h b/config/cortex-a15/bli_kernel.h index 66960a55b..298e516c7 100644 --- a/config/cortex-a15/bli_kernel.h +++ b/config/cortex-a15/bli_kernel.h @@ -38,6 +38,8 @@ // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- +#define BLIS_SIMD_ALIGN_SIZE 16 + // -- Cache blocksizes -- // diff --git a/config/cortex-a9/bli_config.h b/config/cortex-a9/bli_config.h deleted file mode 100644 index 753712540..000000000 --- a/config/cortex-a9/bli_config.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_CONFIG_H -#define BLIS_CONFIG_H - - -#define BLIS_INT_TYPE_SIZE 32 -#define BLIS_SIMD_ALIGN_SIZE 16 -#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32 - - -#endif diff --git a/config/cortex-a9/bli_kernel.h b/config/cortex-a9/bli_kernel.h index 9a6c42bc6..1ec9f5646 100644 --- a/config/cortex-a9/bli_kernel.h +++ b/config/cortex-a9/bli_kernel.h @@ -38,6 +38,8 @@ // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- +#define BLIS_SIMD_ALIGN_SIZE 16 + // -- Cache blocksizes -- // diff --git a/config/dunnington/bli_config.h b/config/dunnington/bli_config.h deleted file mode 100644 index 64392de99..000000000 --- a/config/dunnington/bli_config.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_CONFIG_H -#define BLIS_CONFIG_H - - -#define BLIS_SIMD_ALIGN_SIZE 16 - - -#endif diff --git a/config/dunnington/bli_kernel.h b/config/dunnington/bli_kernel.h index f256bcf55..f5d846d9b 100644 --- a/config/dunnington/bli_kernel.h +++ b/config/dunnington/bli_kernel.h @@ -38,6 +38,8 @@ // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- +#define BLIS_SIMD_ALIGN_SIZE 16 + // -- Cache blocksizes -- // diff --git a/config/emscripten/bli_config.h b/config/emscripten/bli_config.h deleted file mode 100644 index 753712540..000000000 --- a/config/emscripten/bli_config.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_CONFIG_H -#define BLIS_CONFIG_H - - -#define BLIS_INT_TYPE_SIZE 32 -#define BLIS_SIMD_ALIGN_SIZE 16 -#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32 - - -#endif diff --git a/config/emscripten/bli_kernel.h b/config/emscripten/bli_kernel.h index 0064396b2..3a16a9d9f 100644 --- a/config/emscripten/bli_kernel.h +++ b/config/emscripten/bli_kernel.h @@ -39,6 +39,8 @@ // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- +#define BLIS_SIMD_ALIGN_SIZE 16 + // -- Cache blocksizes -- // diff --git a/config/haswell/bli_config.h b/config/haswell/bli_config.h deleted file mode 100644 index 89bba2b20..000000000 --- a/config/haswell/bli_config.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_CONFIG_H -#define BLIS_CONFIG_H - - - - -#endif diff --git a/config/loongson3a/bli_config.h b/config/loongson3a/bli_config.h deleted file mode 100644 index 753712540..000000000 --- a/config/loongson3a/bli_config.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_CONFIG_H -#define BLIS_CONFIG_H - - -#define BLIS_INT_TYPE_SIZE 32 -#define BLIS_SIMD_ALIGN_SIZE 16 -#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32 - - -#endif diff --git a/config/loongson3a/bli_kernel.h b/config/loongson3a/bli_kernel.h index b21a4062f..18f4d5568 100644 --- a/config/loongson3a/bli_kernel.h +++ b/config/loongson3a/bli_kernel.h @@ -38,6 +38,8 @@ // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- +#define BLIS_SIMD_ALIGN_SIZE 16 + // -- Cache blocksizes -- // diff --git a/config/mic/bli_config.h b/config/mic/bli_config.h deleted file mode 100644 index 3e18aa7b1..000000000 --- a/config/mic/bli_config.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_CONFIG_H -#define BLIS_CONFIG_H - - -#define BLIS_TREE_BARRIER -#define BLIS_TREE_BARRIER_ARITY 4 - - -#define BLIS_SIMD_ALIGN_SIZE 32 - -#define BLIS_SIMD_SIZE 64 -#define BLIS_SIMD_NUM_REGISTERS 32 - - - -#endif diff --git a/config/mic/bli_kernel.h b/config/mic/bli_kernel.h index 8667bb678..a0ac8876d 100644 --- a/config/mic/bli_kernel.h +++ b/config/mic/bli_kernel.h @@ -36,6 +36,16 @@ #define BLIS_KERNEL_H +#define BLIS_TREE_BARRIER +#define BLIS_TREE_BARRIER_ARITY 4 + + +#define BLIS_SIMD_ALIGN_SIZE 32 + +#define BLIS_SIMD_SIZE 64 +#define BLIS_SIMD_NUM_REGISTERS 32 + + // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- // -- Cache blocksizes -- diff --git a/config/piledriver/bli_config.h b/config/piledriver/bli_config.h deleted file mode 100644 index 38708a0b2..000000000 --- a/config/piledriver/bli_config.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_CONFIG_H -#define BLIS_CONFIG_H - - - -#define BLIS_SIMD_ALIGN_SIZE 16 - - -#endif diff --git a/config/piledriver/bli_kernel.h b/config/piledriver/bli_kernel.h index 64ccf3c23..d073ae15c 100644 --- a/config/piledriver/bli_kernel.h +++ b/config/piledriver/bli_kernel.h @@ -38,6 +38,8 @@ // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- +#define BLIS_SIMD_ALIGN_SIZE 16 + // -- Cache blocksizes -- // diff --git a/config/pnacl/bli_config.h b/config/pnacl/bli_config.h deleted file mode 100644 index 753712540..000000000 --- a/config/pnacl/bli_config.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_CONFIG_H -#define BLIS_CONFIG_H - - -#define BLIS_INT_TYPE_SIZE 32 -#define BLIS_SIMD_ALIGN_SIZE 16 -#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32 - - -#endif diff --git a/config/pnacl/bli_kernel.h b/config/pnacl/bli_kernel.h index c5dbf581a..25f347192 100644 --- a/config/pnacl/bli_kernel.h +++ b/config/pnacl/bli_kernel.h @@ -44,6 +44,8 @@ // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- +#define BLIS_SIMD_ALIGN_SIZE 16 + // -- Cache blocksizes -- // diff --git a/config/power7/bli_config.h b/config/power7/bli_config.h deleted file mode 100644 index e1725b9b0..000000000 --- a/config/power7/bli_config.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_CONFIG_H -#define BLIS_CONFIG_H - - - - - -#endif diff --git a/config/sandybridge/bli_config.h b/config/sandybridge/bli_config.h deleted file mode 100644 index 5b915c737..000000000 --- a/config/sandybridge/bli_config.h +++ /dev/null @@ -1,40 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_CONFIG_H -#define BLIS_CONFIG_H - - - -#endif diff --git a/config/template/bli_config.h b/config/template/bli_config.h deleted file mode 100644 index cc8e64b5a..000000000 --- a/config/template/bli_config.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#ifndef BLIS_CONFIG_H -#define BLIS_CONFIG_H - - - -#endif - diff --git a/configure b/configure index 2e87327ea..a630b7fe5 100755 --- a/configure +++ b/configure @@ -91,14 +91,34 @@ print_usage() echo " -t MODEL, --enable-threading[=MODEL], --disable-threading" echo " " echo " Enable threading in the library, using threading model" - echo " MODEL={auto,omp,pthreads,no}. If MODEL=no or " + echo " MODEL={omp,pthreads,no}. If MODEL=no or " echo " --disable-threading is specified, threading will be" - echo " disabled. If MODEL=auto or is unspecified, a model" - echo " will be chosen automatically. The default is 'auto'." + echo " disabled. The default is 'no'." echo " " echo " -q, --quiet Suppress informational output. By default, configure" echo " is verbose. (NOTE: -q is not yet implemented)" echo " " + echo " -i SIZE, --int-size=SIZE" + echo " " + echo " Set the size (in bits) of internal BLIS integers and" + echo " integer types used in native BLIS interfaces." + echo " " + echo " -b SIZE, --blas-int-size=SIZE" + echo " " + echo " Set the size (in bits) of integer types in external" + echo " BLAS and CBLAS interfaces, if enabled." + echo " " + echo " --disable-blas, --enable-blas" + echo " " + echo " Disable (enabled by default) building the BLAS" + echo " compatibility layer." + echo " " + echo " --enable-cblas, --disable-cblas" + echo " " + echo " Enable (disabled by default) building the CBLAS" + echo " compatibility layer. This automatically enables the" + echo " BLAS compatibility layer as well." + echo " " echo " -h, --help Output this information and quit." echo " " echo " Environment Variables:" @@ -149,6 +169,13 @@ main() config_mk_in_path="${build_dirpath}/${config_mk_in}" config_mk_out_path="${cur_dirpath}/${config_mk_out}" + # The names/paths for the template bli_config.h.in and its instantiated + # counterpart. + bli_config_h_in='bli_config.h.in' + bli_config_h_out='bli_config.h' + bli_config_h_in_path="${build_dirpath}/${bli_config_h_in}" + bli_config_h_out_path="${cur_dirpath}/${bli_config_h_out}" + # Path to 'update-version-file.sh' script. update_version_file_sh="${build_dirpath}/update-version-file.sh" @@ -188,7 +215,7 @@ main() debug_flag='' # The threading flag. - threading_model='auto' + threading_model='no' # Option variables. quiet_flag='' @@ -197,6 +224,10 @@ main() enable_verbose='no' enable_static='yes' enable_shared='no' + int_type_size=0 + blas2blis_int_type_size=32 + enable_blas2blis='yes' + enable_cblas='no' # The path to the auto-detection script. auto_detect_sh="${build_dirpath}/auto-detect/auto-detect.sh" @@ -221,7 +252,7 @@ main() # Process our command line options. - while getopts ":hp:d:t:q-:" opt; do + while getopts ":hp:d:t:qi:b:-:" opt; do case $opt in -) case "$OPTARG" in @@ -264,15 +295,30 @@ main() disable-shared) enable_shared='no' ;; - enable-threading) - threading_model='auto' - ;; enable-threading=*) threading_model=${OPTARG#*=} ;; disable-threading) threading_model='no' ;; + int-size=*) + int_type_size=${OPTARG#*=} + ;; + blas-int-size=*) + blas2blis_int_type_size=${OPTARG#*=} + ;; + enable-blas) + enable_blas2blis='yes' + ;; + disable-blas) + enable_blas2blis='no' + ;; + enable-cblas) + enable_cblas='yes' + ;; + disable-cblas) + enable_cblas='no' + ;; *) print_usage ;; @@ -294,6 +340,12 @@ main() t) threading_model=$OPTARG ;; + i) + int_type_size=$OPTARG + ;; + b) + blas2blis_int_type_size=$OPTARG + ;; \?) print_usage ;; @@ -430,18 +482,58 @@ main() # Check the threading model flag. + enable_openmp=0 + enable_pthreads=0 if [ "x${threading_model}" = "xauto" ]; then echo "${script_name}: determining the threading model automatically." elif [ "x${threading_model}" = "xomp" ]; then echo "${script_name}: using OpenMP for threading." + enable_openmp=1 elif [ "x${threading_model}" = "xpthreads" ]; then echo "${script_name}: using Pthreads for threading." + enable_pthreads=1 elif [ "x${threading_model}" = "xno" ]; then echo "${script_name}: threading is disabled." else echo "Unsupported threading model: ${threading_model}." exit 1 fi + + + # Convert 'yes' and 'no' flags to booleans. + if [ "x${enable_cblas}" = "xyes" ]; then + echo "${script_name}: the CBLAS compatibility layer is enabled." + enable_cblas=1 + # Force BLAS layer when CBLAS is enabled + enable_blas='yes' + else + echo "${script_name}: the CBLAS compatibility layer is disabled." + enable_cblas=0 + fi + if [ "x${enable_blas2blis}" = "xyes" ]; then + echo "${script_name}: the BLAS compatibility layer is enabled." + enable_blas2blis=1 + else + echo "${script_name}: the BLAS compatibility layer is disabled." + enable_blas2blis=0 + fi + + + # Report integer sizes + if [ "x${int_type_size}" = "x32" ]; then + echo "${script_name}: the internal integer size is 32-bit." + elif [ "x${int_type_size}" = "x64" ]; then + echo "${script_name}: the internal integer size is 64-bit." + else + echo "${script_name}: the internal integer size is automatically determined." + fi + if [ "x${blas2blis_int_type_size}" = "x32" ]; then + echo "${script_name}: the BLAS/CBLAS interface integer size is 32-bit." + elif [ "x${blas2blis_int_type_size}" = "x64" ]; then + echo "${script_name}: the BLAS/CBLAS interface integer size is 64-bit." + else + echo "${script_name}: the BLAS/CBLAS interface integer size is automatically determined." + fi # Insert escape characters into the paths used in the sed command below. @@ -466,6 +558,19 @@ main() | sed "s/@enable_dynamic@/${enable_shared}/g" \ | sed "s/@threading_model@/${threading_model}/g" \ > "${config_mk_out_path}" + + + # Begin substituting information into the bli_config_h_in file, outputting + # to bli_config_h_out. + echo "${script_name}: creating ${bli_config_h_out_path} from ${bli_config_h_in_path}" + cat "${bli_config_h_in_path}" \ + | sed "s/@enable_openmp@/${enable_openmp}/g" \ + | sed "s/@enable_pthreads@/${enable_pthreads}/g" \ + | sed "s/@int_type_size@/${int_type_size}/g" \ + | sed "s/@blas2blis_int_type_size@/${blas2blis_int_type_size}/g" \ + | sed "s/@enable_blas2blis@/${enable_blas2blis}/g" \ + | sed "s/@enable_cblas@/${enable_cblas}/g" \ + > "${bli_config_h_out_path}" # Create obj sub-directories (if they do not already exist). diff --git a/frame/include/bli_config_macro_defs.h b/frame/include/bli_config_macro_defs.h index 3b4ace0c8..a5bce35c0 100644 --- a/frame/include/bli_config_macro_defs.h +++ b/frame/include/bli_config_macro_defs.h @@ -45,7 +45,11 @@ // internally within BLIS as well as those exposed in the native BLAS-like BLIS // interface. #ifndef BLIS_INT_TYPE_SIZE +#ifdef BLIS_ARCH_64 #define BLIS_INT_TYPE_SIZE 64 +#else +#define BLIS_INT_TYPE_SIZE 32 +#endif #endif @@ -177,7 +181,7 @@ // C99 type "long int". Note that this ONLY affects integers used within the // BLAS compatibility layer. #ifndef BLIS_BLAS2BLIS_INT_TYPE_SIZE -#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 64 +#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32 #endif diff --git a/frame/include/bli_system.h b/frame/include/bli_system.h index f00e077d8..57fe810fc 100644 --- a/frame/include/bli_system.h +++ b/frame/include/bli_system.h @@ -41,6 +41,15 @@ #include #include +// Determine if we are on a 64-bit or 32-bit architecture +#if defined(_M_X64) || defined(__x86_64) || defined(__aarch64__) || \ + defined(_ARCH_PPC64) +#define BLIS_ARCH_64 +#else +#define BLIS_ARCH_32 +#endif + +// Determine the target operating system #if defined(_WIN32) || defined(__CYGWIN__) #define BLIS_OS_WINDOWS 1 #elif defined(__APPLE__) || defined(__MACH__) diff --git a/frame/include/blis.h b/frame/include/blis.h index 9bfedd71a..af1a44d4c 100644 --- a/frame/include/blis.h +++ b/frame/include/blis.h @@ -44,6 +44,11 @@ extern "C" { #endif +// -- System headers -- + +#include "bli_system.h" + + // -- BLIS configuration definition -- // NOTE: We include bli_config.h first because there might be something @@ -57,11 +62,6 @@ extern "C" { #include "bli_config_macro_defs.h" -// -- System headers -- - -#include "bli_system.h" - - // -- Common BLIS definitions -- #include "bli_type_defs.h" diff --git a/testsuite/Makefile b/testsuite/Makefile index 60c33ca59..ff421645c 100644 --- a/testsuite/Makefile +++ b/testsuite/Makefile @@ -143,7 +143,7 @@ endif # Expand the fragment paths that contain .h files to attain the set of header # files present in all fragment paths. -MK_HEADER_FILES := $(foreach frag_path, $(FRAGMENT_DIR_PATHS), \ +MK_HEADER_FILES := $(foreach frag_path, .. $(FRAGMENT_DIR_PATHS), \ $(wildcard $(frag_path)/*.h)) # Strip the leading, internal, and trailing whitespace from our list of header @@ -154,7 +154,7 @@ MK_HEADER_FILES := $(strip $(MK_HEADER_FILES)) # expansion. Then, strip the header filename to leave the path to each header # location. Notice this process even weeds out duplicates! Add the config # directory manually since it contains FLA_config.h. -MK_HEADER_DIR_PATHS := $(dir $(foreach frag_path, $(FRAGMENT_DIR_PATHS), \ +MK_HEADER_DIR_PATHS := $(dir $(foreach frag_path, .. $(FRAGMENT_DIR_PATHS), \ $(firstword $(wildcard $(frag_path)/*.h)))) # Add -I to each header path so we can specify our include search paths to the From eb2f18e4844d985715df20798f50f9cc12e3b5ad Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Tue, 19 Apr 2016 12:50:32 -0500 Subject: [PATCH 03/14] More compile-time fixes to bgq gemm ukernel code. --- kernels/bgq/3/bli_gemm_int_8x8.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernels/bgq/3/bli_gemm_int_8x8.c b/kernels/bgq/3/bli_gemm_int_8x8.c index 3aaa7d24c..c58560ef4 100644 --- a/kernels/bgq/3/bli_gemm_int_8x8.c +++ b/kernels/bgq/3/bli_gemm_int_8x8.c @@ -130,8 +130,8 @@ void bli_dgemm_int_8x8 vector4double AB; vector4double C = vec_splats( 0.0 ); - vector4double betav = vec_lds( 0, beta ); - vector4double alphav = vec_lds( 0, alpha ); + vector4double betav = vec_lds( 0, ( double* )beta ); + vector4double alphav = vec_lds( 0, ( double* )alpha ); double ct; //Macro to update 4 elements of C in a column. @@ -297,10 +297,10 @@ void bli_zgemm_int_8x8 vector4double C1 = vec_splats( 0.0 ); vector4double C2 = vec_splats( 0.0 ); - double alphar = bli_zreal( alpha ); - double alphai = bli_zimag( alpha ); - double betar = bli_zreal( beta ); - double betai = bli_zimag( beta ); + double alphar = bli_zreal( *alpha ); + double alphai = bli_zimag( *alpha ); + double betar = bli_zreal( *beta ); + double betai = bli_zimag( *beta ); vector4double alphav = vec_splats( 0.0 ); vector4double betav = vec_splats( 0.0 ); alphav = vec_insert( alphar, alphav, 0); From dd0ab1d93f33abca6af9edd7b8e52da62dcfa5b1 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Wed, 20 Apr 2016 14:38:23 -0500 Subject: [PATCH 04/14] Converted some bli_cntx query functions to macros. Details: - Commented out several datatype-aware query functions (those ending in _dt) from bli_cntx.c, as well as their prototypes in bli_cntx.h, and added equivalent cpp query macros to bli_cntx.h. - Added 'bli_config.h' to .gitignore. --- .gitignore | 1 + frame/base/bli_cntx.c | 34 ++++---- frame/base/bli_cntx.h | 191 +++++++++++++++++++++++++++++++----------- 3 files changed, 165 insertions(+), 61 deletions(-) diff --git a/.gitignore b/.gitignore index b11c60676..4dc9073f6 100644 --- a/.gitignore +++ b/.gitignore @@ -29,6 +29,7 @@ # -- build system files -- config.mk +bli_config.h # -- makefile fragments -- diff --git a/frame/base/bli_cntx.c b/frame/base/bli_cntx.c index 53af75ec6..c1043dfbe 100644 --- a/frame/base/bli_cntx.c +++ b/frame/base/bli_cntx.c @@ -109,6 +109,10 @@ void bli_cntx_obj_clear( cntx_t* cntx ) void bli_cntx_init( cntx_t* cntx ) { + // This function initializes a "universal" context that is pre-loaded + // with kernel addresses for all level-1v, -1f, and -3 kernels, in + // addition to all level-1f and -3 blocksizes. + bli_gks_cntx_set_l3_nat_ukr( BLIS_GEMM_UKR, cntx ); bli_gks_cntx_set_l3_nat_ukr( BLIS_GEMMTRSM_L_UKR, cntx ); bli_gks_cntx_set_l3_nat_ukr( BLIS_GEMMTRSM_U_UKR, cntx ); @@ -161,6 +165,7 @@ blksz_t* bli_cntx_get_blksz( bszid_t bs_id, return blksz; } +#if 0 dim_t bli_cntx_get_blksz_def_dt( num_t dt, bszid_t bs_id, cntx_t* cntx ) @@ -182,6 +187,7 @@ dim_t bli_cntx_get_blksz_max_dt( num_t dt, // Return the default blocksize value for the datatype given. return bli_blksz_get_max( dt, blksz ); } +#endif blksz_t* bli_cntx_get_bmult( bszid_t bs_id, cntx_t* cntx ) @@ -196,6 +202,7 @@ blksz_t* bli_cntx_get_bmult( bszid_t bs_id, return bmult; } +#if 0 dim_t bli_cntx_get_bmult_dt( num_t dt, bszid_t bs_id, cntx_t* cntx ) @@ -203,21 +210,8 @@ dim_t bli_cntx_get_bmult_dt( num_t dt, blksz_t* bmult = bli_cntx_get_bmult( bs_id, cntx ); return bli_blksz_get_def( dt, bmult ); -#if 0 - blksz_t* blkszs = bli_cntx_blkszs_buf( cntx ); - bszid_t* bmults = bli_cntx_bmults_buf( cntx ); - bszid_t bm_id = bmults[ bs_id ]; - - // A little hack to ensure we don't try to access a blocksize object - // using an uninitialized/garbage value in the bmults array (which - // may exist because that blocksize in the context was never set). - if ( bm_id < BLIS_BSZID_LO && BLIS_BSZID_HI < bm_id ) return 0; - - blksz_t* bmult = &blkszs[ bm_id ]; - - return bli_blksz_get_def( dt, bmult ); -#endif } +#endif func_t* bli_cntx_get_l3_ukr( l3ukr_t ukr_id, cntx_t* cntx ) @@ -240,6 +234,7 @@ func_t* bli_cntx_get_l3_ukr( l3ukr_t ukr_id, return l3_ukr; } +#if 0 void* bli_cntx_get_l3_ukr_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx ) @@ -260,6 +255,7 @@ void* bli_cntx_get_l3_ukr_dt( num_t dt, return bli_func_get_dt( dt, l3_ukr ); } +#endif func_t* bli_cntx_get_l3_vir_ukr( l3ukr_t ukr_id, cntx_t* cntx ) @@ -272,6 +268,7 @@ func_t* bli_cntx_get_l3_vir_ukr( l3ukr_t ukr_id, return l3_vir_ukr; } +#if 0 void* bli_cntx_get_l3_vir_ukr_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx ) @@ -283,6 +280,7 @@ void* bli_cntx_get_l3_vir_ukr_dt( num_t dt, // identified by ukr_id. return bli_func_get_dt( dt, l3_vir_ukr ); } +#endif func_t* bli_cntx_get_l3_nat_ukr( l3ukr_t ukr_id, cntx_t* cntx ) @@ -295,6 +293,7 @@ func_t* bli_cntx_get_l3_nat_ukr( l3ukr_t ukr_id, return l3_nat_ukr; } +#if 0 void* bli_cntx_get_l3_nat_ukr_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx ) @@ -306,6 +305,7 @@ void* bli_cntx_get_l3_nat_ukr_dt( num_t dt, // identified by ukr_id. return bli_func_get_dt( dt, l3_nat_ukr ); } +#endif func_t* bli_cntx_get_l1f_ker( l1fkr_t ker_id, cntx_t* cntx ) @@ -318,6 +318,7 @@ func_t* bli_cntx_get_l1f_ker( l1fkr_t ker_id, return l1f_ker; } +#if 0 void* bli_cntx_get_l1f_ker_dt( num_t dt, l1fkr_t ker_id, cntx_t* cntx ) @@ -327,6 +328,7 @@ void* bli_cntx_get_l1f_ker_dt( num_t dt, return bli_func_get_dt( dt, l1f_ker ); } +#endif func_t* bli_cntx_get_l1v_ker( l1vkr_t ker_id, cntx_t* cntx ) @@ -339,6 +341,7 @@ func_t* bli_cntx_get_l1v_ker( l1vkr_t ker_id, return l1v_ker; } +#if 0 void* bli_cntx_get_l1v_ker_dt( num_t dt, l1vkr_t ker_id, cntx_t* cntx ) @@ -348,6 +351,7 @@ void* bli_cntx_get_l1v_ker_dt( num_t dt, return bli_func_get_dt( dt, l1v_ker ); } +#endif mbool_t* bli_cntx_get_l3_nat_ukr_prefs( l3ukr_t ukr_id, cntx_t* cntx ) @@ -367,6 +371,7 @@ func_t* bli_cntx_get_packm_ukr( cntx_t* cntx ) return packm_ukrs; } +#if 0 ind_t bli_cntx_get_ind_method( cntx_t* cntx ) { return bli_cntx_method( cntx ); @@ -381,6 +386,7 @@ pack_t bli_cntx_get_pack_schema_b( cntx_t* cntx ) { return bli_cntx_schema_b( cntx ); } +#endif // ----------------------------------------------------------------------------- diff --git a/frame/base/bli_cntx.h b/frame/base/bli_cntx.h index 67a0fcd96..5635ddc88 100644 --- a/frame/base/bli_cntx.h +++ b/frame/base/bli_cntx.h @@ -66,55 +66,55 @@ typedef struct cntx_s #define bli_cntx_blkszs_buf( cntx ) \ \ - ( cntx->blkszs ) + ( (cntx)->blkszs ) #define bli_cntx_bmults_buf( cntx ) \ \ - ( cntx->bmults ) + ( (cntx)->bmults ) #define bli_cntx_l3_vir_ukrs_buf( cntx ) \ \ - ( cntx->l3_vir_ukrs ) + ( (cntx)->l3_vir_ukrs ) #define bli_cntx_l3_nat_ukrs_buf( cntx ) \ \ - ( cntx->l3_nat_ukrs ) + ( (cntx)->l3_nat_ukrs ) #define bli_cntx_l3_nat_ukrs_prefs_buf( cntx ) \ \ - ( cntx->l3_nat_ukrs_prefs ) + ( (cntx)->l3_nat_ukrs_prefs ) #define bli_cntx_l1f_kers_buf( cntx ) \ \ - ( cntx->l1f_kers ) + ( (cntx)->l1f_kers ) #define bli_cntx_l1v_kers_buf( cntx ) \ \ - ( cntx->l1v_kers ) + ( (cntx)->l1v_kers ) #define bli_cntx_packm_ukrs_buf( cntx ) \ \ - (&(cntx->packm_ukrs) ) + (&((cntx)->packm_ukrs) ) #define bli_cntx_packm_ukrs( cntx ) \ \ - (&(cntx->packm_ukrs) ) + (&((cntx)->packm_ukrs) ) #define bli_cntx_method( cntx ) \ \ - ( cntx->method ) + ( (cntx)->method ) #define bli_cntx_schema_a( cntx ) \ \ - ( cntx->schema_a ) + ( (cntx)->schema_a ) #define bli_cntx_schema_b( cntx ) \ \ - ( cntx->schema_b ) + ( (cntx)->schema_b ) #define bli_cntx_schema_c( cntx ) \ \ - ( cntx->schema_c ) + ( (cntx)->schema_c ) // cntx_t modification (fields only) @@ -178,13 +178,99 @@ typedef struct cntx_s (cntx_p)->schema_c = _schema_c; \ } +// cntx_t query (complex) + +#define bli_cntx_get_blksz_def_dt( dt, bs_id, cntx ) \ +\ + bli_blksz_get_def \ + ( \ + (dt), (&(bli_cntx_blkszs_buf( (cntx) ))[ bs_id ]) \ + ) + +#define bli_cntx_get_blksz_max_dt( dt, bs_id, cntx ) \ +\ + bli_blksz_get_max \ + ( \ + (dt), (&(bli_cntx_blkszs_buf( (cntx) ))[ bs_id ]) \ + ) + +#define bli_cntx_get_bmult_dt( dt, bs_id, cntx ) \ +\ + bli_blksz_get_def \ + ( \ + (dt), \ + (&(bli_cntx_blkszs_buf( (cntx) )) \ + [ \ + (bli_cntx_bmults_buf( (cntx) ))[ bs_id ] \ + ]) \ + ) + +#define bli_cntx_get_l3_ukr_dt( dt, ukr_id, cntx ) \ +\ + bli_func_get_dt \ + ( \ + (dt), \ + &(( \ + bli_cntx_method( (cntx) ) != BLIS_NAT \ + ? bli_cntx_l3_vir_ukrs_buf( (cntx) ) \ + : bli_cntx_l3_nat_ukrs_buf( (cntx) ) \ + )[ ukr_id ]) \ + ) + +#define bli_cntx_get_l3_vir_ukr_dt( dt, ukr_id, cntx ) \ +\ + bli_func_get_dt \ + ( \ + (dt), (&(bli_cntx_l3_vir_ukrs_buf( (cntx) ))[ ukr_id ]) \ + ) + +#define bli_cntx_get_l3_nat_ukr_dt( dt, ukr_id, cntx ) \ +\ + bli_func_get_dt \ + ( \ + (dt), (&(bli_cntx_l3_nat_ukrs_buf( (cntx) ))[ ukr_id ]) \ + ) + +#define bli_cntx_get_l1f_ker_dt( dt, ker_id, cntx ) \ +\ + bli_func_get_dt \ + ( \ + (dt), (&(bli_cntx_l1f_kers_buf( (cntx) ))[ ker_id ]) \ + ) + +#define bli_cntx_get_l1v_ker_dt( dt, ker_id, cntx ) \ +\ + bli_func_get_dt \ + ( \ + (dt), (&(bli_cntx_l1v_kers_buf( (cntx) ))[ ker_id ]) \ + ) + +#define bli_cntx_get_l3_nat_ukr_prefs_dt( dt, ukr_id, cntx ) \ +\ + bli_mbool_get_dt \ + ( \ + (dt), (&(bli_cntx_l3_nat_ukrs_prefs_buf( (cntx) ))[ ukr_id ]) \ + ) + +#define bli_cntx_get_ind_method( cntx ) \ +\ + bli_cntx_method( cntx ) + +#define bli_cntx_get_pack_schema_a( cntx ) \ +\ + bli_cntx_schema_a( cntx ) + +#define bli_cntx_get_pack_schema_b( cntx ) \ +\ + bli_cntx_schema_b( cntx ) + + + // ----------------------------------------------------------------------------- // create/free //void bli_cntx_obj_create( cntx_t* cntx ); -//void bli_cntx_obj_copy( cntx_t* src, -// cntx_t* dst ); //void bli_cntx_obj_free( cntx_t* cntx ); void bli_cntx_obj_clear( cntx_t* cntx ); void bli_cntx_init( cntx_t* cntx ); @@ -193,49 +279,53 @@ void bli_cntx_init( cntx_t* cntx ); blksz_t* bli_cntx_get_blksz( bszid_t bs_id, cntx_t* cntx ); -dim_t bli_cntx_get_blksz_def_dt( num_t dt, - bszid_t bs_id, - cntx_t* cntx ); -dim_t bli_cntx_get_blksz_max_dt( num_t dt, - bszid_t bs_id, - cntx_t* cntx ); blksz_t* bli_cntx_get_bmult( bszid_t bs_id, cntx_t* cntx ); -dim_t bli_cntx_get_bmult_dt( num_t dt, - bszid_t bs_id, - cntx_t* cntx ); func_t* bli_cntx_get_l3_ukr( l3ukr_t ukr_id, cntx_t* cntx ); -void* bli_cntx_get_l3_ukr_dt( num_t dt, - l3ukr_t ukr_id, - cntx_t* cntx ); func_t* bli_cntx_get_l3_vir_ukr( l3ukr_t ukr_id, cntx_t* cntx ); -void* bli_cntx_get_l3_vir_ukr_dt( num_t dt, - l3ukr_t ukr_id, - cntx_t* cntx ); func_t* bli_cntx_get_l3_nat_ukr( l3ukr_t ukr_id, cntx_t* cntx ); -void* bli_cntx_get_l3_nat_ukr_dt( num_t dt, - l3ukr_t ukr_id, - cntx_t* cntx ); mbool_t* bli_cntx_get_l3_nat_ukr_prefs( l3ukr_t ukr_id, cntx_t* cntx ); func_t* bli_cntx_get_l1f_ker( l1fkr_t ker_id, cntx_t* cntx ); -void* bli_cntx_get_l1f_ker_dt( num_t dt, - l1fkr_t ker_id, - cntx_t* cntx ); func_t* bli_cntx_get_l1v_ker( l1vkr_t ker_id, cntx_t* cntx ); -void* bli_cntx_get_l1v_ker_dt( num_t dt, - l1vkr_t ker_id, - cntx_t* cntx ); func_t* bli_cntx_get_packm_ukr( cntx_t* cntx ); -ind_t bli_cntx_get_ind_method( cntx_t* cntx ); -pack_t bli_cntx_get_pack_schema_a( cntx_t* cntx ); -pack_t bli_cntx_get_pack_schema_b( cntx_t* cntx ); -pack_t bli_cntx_get_pack_schema_c( cntx_t* cntx ); + +//dim_t bli_cntx_get_blksz_def_dt( num_t dt, +// bszid_t bs_id, +// cntx_t* cntx ); +//dim_t bli_cntx_get_blksz_max_dt( num_t dt, +// bszid_t bs_id, +// cntx_t* cntx ); +//dim_t bli_cntx_get_bmult_dt( num_t dt, +// bszid_t bs_id, +// cntx_t* cntx ); +//void* bli_cntx_get_l3_ukr_dt( num_t dt, +// l3ukr_t ukr_id, +// cntx_t* cntx ); +//void* bli_cntx_get_l3_vir_ukr_dt( num_t dt, +// l3ukr_t ukr_id, +// cntx_t* cntx ); +//void* bli_cntx_get_l3_nat_ukr_dt( num_t dt, +// l3ukr_t ukr_id, +// cntx_t* cntx ); +//bool_t bli_cntx_get_l3_nat_ukr_prefs_dt( num_t dt, +// l3ukr_t ukr_id, +// cntx_t* cntx ); +//void* bli_cntx_get_l1f_ker_dt( num_t dt, +// l1fkr_t ker_id, +// cntx_t* cntx ); +//void* bli_cntx_get_l1v_ker_dt( num_t dt, +// l1vkr_t ker_id, +// cntx_t* cntx ); +//ind_t bli_cntx_get_ind_method( cntx_t* cntx ); +//pack_t bli_cntx_get_pack_schema_a( cntx_t* cntx ); +//pack_t bli_cntx_get_pack_schema_b( cntx_t* cntx ); +//pack_t bli_cntx_get_pack_schema_c( cntx_t* cntx ); // set functions @@ -294,10 +384,17 @@ void bli_cntx_print( cntx_t* cntx ); // Preprocess out these calls entirely, since they are currently just empty // functions that do nothing. -//#define bli_cntx_obj_create( cntx ) { bli_cntx_obj_clear( cntx ); } -//#define bli_cntx_obj_free( cntx ) { bli_cntx_obj_clear( cntx ); } -#define bli_cntx_obj_create( cntx ) { ; } -#define bli_cntx_obj_free( cntx ) { ; } +#if 0 + #define bli_cntx_obj_create( cntx ) { bli_cntx_obj_clear( cntx ); } + #define bli_cntx_obj_free( cntx ) { bli_cntx_obj_clear( cntx ); } +#else + #define bli_cntx_obj_create( cntx ) { ; } + #define bli_cntx_obj_free( cntx ) { ; } +#endif + +// These macros initialize/finalize a local context if the given context +// pointer is NULL. When initializing, the context address that should +// be used (local or external) is assigned to cntx_p. #define bli_cntx_init_local_if( opname, cntx, cntx_p ) \ \ From e4c54c81463c2a19c9bb6b1f0f1be3fa9d018a45 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Wed, 20 Apr 2016 15:56:46 -0500 Subject: [PATCH 05/14] Change integer type in CBLAS function signatures to f77_int, and add proper const-correctness to BLAS layer. --- config/haswell/bli_config.h | 2 +- frame/compat/bla_amax.c | 6 +- frame/compat/bla_amax.h | 4 +- frame/compat/bla_asum.c | 6 +- frame/compat/bla_asum.h | 4 +- frame/compat/bla_axpy.c | 14 +- frame/compat/bla_axpy.h | 8 +- frame/compat/bla_copy.c | 10 +- frame/compat/bla_copy.h | 6 +- frame/compat/bla_dot.c | 26 +- frame/compat/bla_dot.h | 18 +- frame/compat/bla_gemm.c | 30 +- frame/compat/bla_gemm.h | 20 +- frame/compat/bla_gemv.c | 26 +- frame/compat/bla_gemv.h | 16 +- frame/compat/bla_ger.c | 20 +- frame/compat/bla_ger.h | 12 +- frame/compat/bla_hemm.c | 28 +- frame/compat/bla_hemm.h | 18 +- frame/compat/bla_hemv.c | 24 +- frame/compat/bla_hemv.h | 14 +- frame/compat/bla_her.c | 16 +- frame/compat/bla_her.h | 10 +- frame/compat/bla_her2.c | 20 +- frame/compat/bla_her2.h | 12 +- frame/compat/bla_her2k.c | 28 +- frame/compat/bla_her2k.h | 18 +- frame/compat/bla_herk.c | 24 +- frame/compat/bla_herk.h | 16 +- frame/compat/bla_nrm2.c | 6 +- frame/compat/bla_nrm2.h | 4 +- frame/compat/bla_scal.c | 10 +- frame/compat/bla_scal.h | 6 +- frame/compat/bla_swap.c | 10 +- frame/compat/bla_swap.h | 6 +- frame/compat/bla_symm.c | 28 +- frame/compat/bla_symm.h | 18 +- frame/compat/bla_symv.c | 24 +- frame/compat/bla_symv.h | 14 +- frame/compat/bla_syr.c | 16 +- frame/compat/bla_syr.h | 10 +- frame/compat/bla_syr2.c | 20 +- frame/compat/bla_syr2.h | 12 +- frame/compat/bla_syr2k.c | 28 +- frame/compat/bla_syr2k.h | 18 +- frame/compat/bla_syrk.c | 24 +- frame/compat/bla_syrk.h | 16 +- frame/compat/bla_trmm.c | 24 +- frame/compat/bla_trmm.h | 18 +- frame/compat/bla_trmv.c | 16 +- frame/compat/bla_trmv.h | 12 +- frame/compat/bla_trsm.c | 24 +- frame/compat/bla_trsm.h | 18 +- frame/compat/bla_trsv.c | 16 +- frame/compat/bla_trsv.h | 12 +- frame/compat/bli_blas.h | 14 + frame/compat/cblas/bli_cblas.h | 18 +- frame/compat/cblas/f77_sub/f77_amax_sub.c | 4 +- frame/compat/cblas/f77_sub/f77_amax_sub.h | 4 +- frame/compat/cblas/f77_sub/f77_asum_sub.c | 4 +- frame/compat/cblas/f77_sub/f77_asum_sub.h | 4 +- frame/compat/cblas/f77_sub/f77_dot_sub.c | 25 +- frame/compat/cblas/f77_sub/f77_dot_sub.h | 19 +- frame/compat/cblas/f77_sub/f77_nrm2_sub.c | 4 +- frame/compat/cblas/f77_sub/f77_nrm2_sub.h | 4 +- frame/compat/cblas/src/cblas.h | 912 +++++++++++----------- frame/compat/cblas/src/cblas_caxpy.c | 12 +- frame/compat/cblas/src/cblas_ccopy.c | 12 +- frame/compat/cblas/src/cblas_cdotc_sub.c | 12 +- frame/compat/cblas/src/cblas_cdotu_sub.c | 12 +- frame/compat/cblas/src/cblas_cgbmv.c | 34 +- frame/compat/cblas/src/cblas_cgemm.c | 24 +- frame/compat/cblas/src/cblas_cgemv.c | 30 +- frame/compat/cblas/src/cblas_cgerc.c | 20 +- frame/compat/cblas/src/cblas_cgeru.c | 16 +- frame/compat/cblas/src/cblas_chbmv.c | 28 +- frame/compat/cblas/src/cblas_chemm.c | 24 +- frame/compat/cblas/src/cblas_chemv.c | 28 +- frame/compat/cblas/src/cblas_cher.c | 20 +- frame/compat/cblas/src/cblas_cher2.c | 28 +- frame/compat/cblas/src/cblas_cher2k.c | 20 +- frame/compat/cblas/src/cblas_cherk.c | 22 +- frame/compat/cblas/src/cblas_chpmv.c | 26 +- frame/compat/cblas/src/cblas_chpr.c | 20 +- frame/compat/cblas/src/cblas_chpr2.c | 24 +- frame/compat/cblas/src/cblas_cscal.c | 12 +- frame/compat/cblas/src/cblas_csscal.c | 12 +- frame/compat/cblas/src/cblas_cswap.c | 12 +- frame/compat/cblas/src/cblas_csymm.c | 24 +- frame/compat/cblas/src/cblas_csyr2k.c | 22 +- frame/compat/cblas/src/cblas_csyrk.c | 22 +- frame/compat/cblas/src/cblas_ctbmv.c | 18 +- frame/compat/cblas/src/cblas_ctbsv.c | 18 +- frame/compat/cblas/src/cblas_ctpmv.c | 16 +- frame/compat/cblas/src/cblas_ctpsv.c | 16 +- frame/compat/cblas/src/cblas_ctrmm.c | 20 +- frame/compat/cblas/src/cblas_ctrmv.c | 18 +- frame/compat/cblas/src/cblas_ctrsm.c | 20 +- frame/compat/cblas/src/cblas_ctrsv.c | 18 +- frame/compat/cblas/src/cblas_dasum.c | 8 +- frame/compat/cblas/src/cblas_daxpy.c | 10 +- frame/compat/cblas/src/cblas_dcopy.c | 10 +- frame/compat/cblas/src/cblas_ddot.c | 10 +- frame/compat/cblas/src/cblas_dgbmv.c | 18 +- frame/compat/cblas/src/cblas_dgemm.c | 16 +- frame/compat/cblas/src/cblas_dgemv.c | 16 +- frame/compat/cblas/src/cblas_dger.c | 12 +- frame/compat/cblas/src/cblas_dnrm2.c | 8 +- frame/compat/cblas/src/cblas_drot.c | 10 +- frame/compat/cblas/src/cblas_drotg.c | 6 +- frame/compat/cblas/src/cblas_drotm.c | 10 +- frame/compat/cblas/src/cblas_drotmg.c | 6 +- frame/compat/cblas/src/cblas_dsbmv.c | 16 +- frame/compat/cblas/src/cblas_dscal.c | 10 +- frame/compat/cblas/src/cblas_dsdot.c | 10 +- frame/compat/cblas/src/cblas_dspmv.c | 16 +- frame/compat/cblas/src/cblas_dspr.c | 12 +- frame/compat/cblas/src/cblas_dspr2.c | 12 +- frame/compat/cblas/src/cblas_dswap.c | 10 +- frame/compat/cblas/src/cblas_dsymm.c | 16 +- frame/compat/cblas/src/cblas_dsymv.c | 16 +- frame/compat/cblas/src/cblas_dsyr.c | 12 +- frame/compat/cblas/src/cblas_dsyr2.c | 14 +- frame/compat/cblas/src/cblas_dsyr2k.c | 16 +- frame/compat/cblas/src/cblas_dsyrk.c | 14 +- frame/compat/cblas/src/cblas_dtbmv.c | 14 +- frame/compat/cblas/src/cblas_dtbsv.c | 14 +- frame/compat/cblas/src/cblas_dtpmv.c | 12 +- frame/compat/cblas/src/cblas_dtpsv.c | 12 +- frame/compat/cblas/src/cblas_dtrmm.c | 16 +- frame/compat/cblas/src/cblas_dtrmv.c | 14 +- frame/compat/cblas/src/cblas_dtrsm.c | 16 +- frame/compat/cblas/src/cblas_dtrsv.c | 14 +- frame/compat/cblas/src/cblas_dzasum.c | 8 +- frame/compat/cblas/src/cblas_dznrm2.c | 8 +- frame/compat/cblas/src/cblas_f77.h | 834 ++++---------------- frame/compat/cblas/src/cblas_globals.c | 6 +- frame/compat/cblas/src/cblas_icamax.c | 10 +- frame/compat/cblas/src/cblas_idamax.c | 8 +- frame/compat/cblas/src/cblas_isamax.c | 8 +- frame/compat/cblas/src/cblas_izamax.c | 10 +- frame/compat/cblas/src/cblas_sasum.c | 8 +- frame/compat/cblas/src/cblas_saxpy.c | 10 +- frame/compat/cblas/src/cblas_scasum.c | 8 +- frame/compat/cblas/src/cblas_scnrm2.c | 8 +- frame/compat/cblas/src/cblas_scopy.c | 10 +- frame/compat/cblas/src/cblas_sdot.c | 10 +- frame/compat/cblas/src/cblas_sdsdot.c | 10 +- frame/compat/cblas/src/cblas_sgbmv.c | 18 +- frame/compat/cblas/src/cblas_sgemm.c | 16 +- frame/compat/cblas/src/cblas_sgemv.c | 16 +- frame/compat/cblas/src/cblas_sger.c | 12 +- frame/compat/cblas/src/cblas_snrm2.c | 8 +- frame/compat/cblas/src/cblas_srot.c | 10 +- frame/compat/cblas/src/cblas_srotg.c | 6 +- frame/compat/cblas/src/cblas_srotm.c | 10 +- frame/compat/cblas/src/cblas_srotmg.c | 6 +- frame/compat/cblas/src/cblas_ssbmv.c | 14 +- frame/compat/cblas/src/cblas_sscal.c | 10 +- frame/compat/cblas/src/cblas_sspmv.c | 16 +- frame/compat/cblas/src/cblas_sspr.c | 12 +- frame/compat/cblas/src/cblas_sspr2.c | 12 +- frame/compat/cblas/src/cblas_sswap.c | 10 +- frame/compat/cblas/src/cblas_ssymm.c | 16 +- frame/compat/cblas/src/cblas_ssymv.c | 16 +- frame/compat/cblas/src/cblas_ssyr.c | 12 +- frame/compat/cblas/src/cblas_ssyr2.c | 14 +- frame/compat/cblas/src/cblas_ssyr2k.c | 16 +- frame/compat/cblas/src/cblas_ssyrk.c | 14 +- frame/compat/cblas/src/cblas_stbmv.c | 14 +- frame/compat/cblas/src/cblas_stbsv.c | 14 +- frame/compat/cblas/src/cblas_stpmv.c | 12 +- frame/compat/cblas/src/cblas_stpsv.c | 12 +- frame/compat/cblas/src/cblas_strmm.c | 16 +- frame/compat/cblas/src/cblas_strmv.c | 14 +- frame/compat/cblas/src/cblas_strsm.c | 16 +- frame/compat/cblas/src/cblas_strsv.c | 14 +- frame/compat/cblas/src/cblas_xerbla.c | 10 +- frame/compat/cblas/src/cblas_zaxpy.c | 12 +- frame/compat/cblas/src/cblas_zcopy.c | 12 +- frame/compat/cblas/src/cblas_zdotc_sub.c | 12 +- frame/compat/cblas/src/cblas_zdotu_sub.c | 12 +- frame/compat/cblas/src/cblas_zdscal.c | 12 +- frame/compat/cblas/src/cblas_zgbmv.c | 34 +- frame/compat/cblas/src/cblas_zgemm.c | 24 +- frame/compat/cblas/src/cblas_zgemv.c | 32 +- frame/compat/cblas/src/cblas_zgerc.c | 20 +- frame/compat/cblas/src/cblas_zgeru.c | 16 +- frame/compat/cblas/src/cblas_zhbmv.c | 28 +- frame/compat/cblas/src/cblas_zhemm.c | 24 +- frame/compat/cblas/src/cblas_zhemv.c | 28 +- frame/compat/cblas/src/cblas_zher.c | 20 +- frame/compat/cblas/src/cblas_zher2.c | 28 +- frame/compat/cblas/src/cblas_zher2k.c | 20 +- frame/compat/cblas/src/cblas_zherk.c | 22 +- frame/compat/cblas/src/cblas_zhpmv.c | 26 +- frame/compat/cblas/src/cblas_zhpr.c | 20 +- frame/compat/cblas/src/cblas_zhpr2.c | 28 +- frame/compat/cblas/src/cblas_zscal.c | 12 +- frame/compat/cblas/src/cblas_zswap.c | 12 +- frame/compat/cblas/src/cblas_zsymm.c | 24 +- frame/compat/cblas/src/cblas_zsyr2k.c | 22 +- frame/compat/cblas/src/cblas_zsyrk.c | 22 +- frame/compat/cblas/src/cblas_ztbmv.c | 18 +- frame/compat/cblas/src/cblas_ztbsv.c | 18 +- frame/compat/cblas/src/cblas_ztpmv.c | 16 +- frame/compat/cblas/src/cblas_ztpsv.c | 16 +- frame/compat/cblas/src/cblas_ztrmm.c | 20 +- frame/compat/cblas/src/cblas_ztrmv.c | 18 +- frame/compat/cblas/src/cblas_ztrsm.c | 24 +- frame/compat/cblas/src/cblas_ztrsv.c | 18 +- frame/compat/check/bla_gemm_check.c | 20 +- frame/compat/check/bla_gemm_check.h | 20 +- frame/compat/check/bla_gemv_check.c | 16 +- frame/compat/check/bla_gemv_check.h | 16 +- frame/compat/check/bla_ger_check.c | 14 +- frame/compat/check/bla_ger_check.h | 14 +- frame/compat/check/bla_hemm_check.c | 18 +- frame/compat/check/bla_hemm_check.h | 18 +- frame/compat/check/bla_hemv_check.c | 14 +- frame/compat/check/bla_hemv_check.h | 14 +- frame/compat/check/bla_her2_check.c | 14 +- frame/compat/check/bla_her2_check.h | 14 +- frame/compat/check/bla_her2k_check.c | 18 +- frame/compat/check/bla_her2k_check.h | 18 +- frame/compat/check/bla_her_check.c | 12 +- frame/compat/check/bla_her_check.h | 12 +- frame/compat/check/bla_herk_check.c | 16 +- frame/compat/check/bla_herk_check.h | 16 +- frame/compat/check/bla_symm_check.c | 18 +- frame/compat/check/bla_symm_check.h | 18 +- frame/compat/check/bla_symv_check.c | 14 +- frame/compat/check/bla_symv_check.h | 14 +- frame/compat/check/bla_syr2_check.c | 14 +- frame/compat/check/bla_syr2_check.h | 14 +- frame/compat/check/bla_syr2k_check.c | 18 +- frame/compat/check/bla_syr2k_check.h | 18 +- frame/compat/check/bla_syr_check.c | 12 +- frame/compat/check/bla_syr_check.h | 12 +- frame/compat/check/bla_syrk_check.c | 16 +- frame/compat/check/bla_syrk_check.h | 16 +- frame/compat/check/bla_trmm_check.c | 20 +- frame/compat/check/bla_trmm_check.h | 20 +- frame/compat/check/bla_trmv_check.c | 16 +- frame/compat/check/bla_trmv_check.h | 16 +- frame/compat/check/bla_trsm_check.c | 20 +- frame/compat/check/bla_trsm_check.h | 20 +- frame/compat/check/bla_trsv_check.c | 16 +- frame/compat/check/bla_trsv_check.h | 16 +- frame/compat/f2c/bla_gbmv.c | 28 +- frame/compat/f2c/bla_gbmv.h | 8 +- frame/compat/f2c/bla_hbmv.c | 16 +- frame/compat/f2c/bla_hbmv.h | 4 +- frame/compat/f2c/bla_hpmv.c | 16 +- frame/compat/f2c/bla_hpmv.h | 4 +- frame/compat/f2c/bla_hpr.c | 16 +- frame/compat/f2c/bla_hpr.h | 4 +- frame/compat/f2c/bla_hpr2.c | 16 +- frame/compat/f2c/bla_hpr2.h | 4 +- frame/compat/f2c/bla_lsame.c | 2 +- frame/compat/f2c/bla_lsame.h | 2 +- frame/compat/f2c/bla_rot.c | 8 +- frame/compat/f2c/bla_rot.h | 8 +- frame/compat/f2c/bla_rotg.c | 16 +- frame/compat/f2c/bla_rotm.c | 4 +- frame/compat/f2c/bla_rotm.h | 4 +- frame/compat/f2c/bla_rotmg.c | 4 +- frame/compat/f2c/bla_rotmg.h | 4 +- frame/compat/f2c/bla_sbmv.c | 12 +- frame/compat/f2c/bla_sbmv.h | 4 +- frame/compat/f2c/bla_spmv.c | 12 +- frame/compat/f2c/bla_spmv.h | 4 +- frame/compat/f2c/bla_spr.c | 12 +- frame/compat/f2c/bla_spr.h | 4 +- frame/compat/f2c/bla_spr2.c | 12 +- frame/compat/f2c/bla_spr2.h | 4 +- frame/compat/f2c/bla_tbmv.c | 28 +- frame/compat/f2c/bla_tbmv.h | 8 +- frame/compat/f2c/bla_tbsv.c | 30 +- frame/compat/f2c/bla_tbsv.h | 8 +- frame/compat/f2c/bla_tpmv.c | 28 +- frame/compat/f2c/bla_tpmv.h | 8 +- frame/compat/f2c/bla_tpsv.c | 30 +- frame/compat/f2c/bla_tpsv.h | 8 +- frame/compat/f2c/bla_xerbla.c | 8 +- frame/compat/f2c/bla_xerbla.h | 2 +- frame/compat/f2c/util/bla_c_abs.c | 2 +- frame/compat/f2c/util/bla_c_abs.h | 2 +- frame/compat/f2c/util/bla_c_div.c | 2 +- frame/compat/f2c/util/bla_c_div.h | 2 +- frame/compat/f2c/util/bla_d_abs.c | 2 +- frame/compat/f2c/util/bla_d_abs.h | 2 +- frame/compat/f2c/util/bla_d_cnjg.c | 2 +- frame/compat/f2c/util/bla_d_cnjg.h | 2 +- frame/compat/f2c/util/bla_d_imag.c | 2 +- frame/compat/f2c/util/bla_d_imag.h | 2 +- frame/compat/f2c/util/bla_d_sign.c | 2 +- frame/compat/f2c/util/bla_d_sign.h | 2 +- frame/compat/f2c/util/bla_f__cabs.h | 2 +- frame/compat/f2c/util/bla_r_abs.c | 2 +- frame/compat/f2c/util/bla_r_abs.h | 2 +- frame/compat/f2c/util/bla_r_cnjg.c | 2 +- frame/compat/f2c/util/bla_r_cnjg.h | 2 +- frame/compat/f2c/util/bla_r_imag.c | 2 +- frame/compat/f2c/util/bla_r_imag.h | 2 +- frame/compat/f2c/util/bla_r_sign.c | 2 +- frame/compat/f2c/util/bla_r_sign.h | 2 +- frame/compat/f2c/util/bla_z_abs.c | 2 +- frame/compat/f2c/util/bla_z_abs.h | 2 +- frame/compat/f2c/util/bla_z_div.c | 2 +- frame/compat/f2c/util/bla_z_div.h | 2 +- frame/include/blis.h | 9 +- 312 files changed, 2467 insertions(+), 3570 deletions(-) diff --git a/config/haswell/bli_config.h b/config/haswell/bli_config.h index 89bba2b20..951b007d8 100644 --- a/config/haswell/bli_config.h +++ b/config/haswell/bli_config.h @@ -35,7 +35,7 @@ #ifndef BLIS_CONFIG_H #define BLIS_CONFIG_H - +#define BLIS_ENABLE_CBLAS #endif diff --git a/frame/compat/bla_amax.c b/frame/compat/bla_amax.c index 47f12de83..24aa192e3 100644 --- a/frame/compat/bla_amax.c +++ b/frame/compat/bla_amax.c @@ -43,8 +43,8 @@ \ f77_int PASTEF772(i,chx,blasname) \ ( \ - f77_int* n, \ - ftype_x* x, f77_int* incx \ + const f77_int* n, \ + const ftype_x* x, const f77_int* incx \ ) \ { \ dim_t n0; \ @@ -68,7 +68,7 @@ f77_int PASTEF772(i,chx,blasname) \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ - bli_convert_blas_incv( n0, x, *incx, x0, incx0 ); \ + bli_convert_blas_incv( n0, (ftype_x*)x, *incx, x0, incx0 ); \ \ /* Call BLIS interface. */ \ PASTEMAC(chx,blisname) \ diff --git a/frame/compat/bla_amax.h b/frame/compat/bla_amax.h index 220a3fb48..f9d8a9957 100644 --- a/frame/compat/bla_amax.h +++ b/frame/compat/bla_amax.h @@ -41,8 +41,8 @@ \ f77_int PASTEF772(i,chx,blasname) \ ( \ - f77_int* n, \ - ftype_x* x, f77_int* incx \ + const f77_int* n, \ + const ftype_x* x, const f77_int* incx \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bla_asum.c b/frame/compat/bla_asum.c index 8b4291296..df2b174eb 100644 --- a/frame/compat/bla_asum.c +++ b/frame/compat/bla_asum.c @@ -43,8 +43,8 @@ \ ftype_r PASTEF772(chr,chx,blasname) \ ( \ - f77_int* n, \ - ftype_x* x, f77_int* incx \ + const f77_int* n, \ + const ftype_x* x, const f77_int* incx \ ) \ { \ dim_t n0; \ @@ -61,7 +61,7 @@ ftype_r PASTEF772(chr,chx,blasname) \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ - bli_convert_blas_incv( n0, x, *incx, x0, incx0 ); \ + bli_convert_blas_incv( n0, (ftype_x*)x, *incx, x0, incx0 ); \ \ /* Call BLIS interface. */ \ PASTEMAC(chx,blisname) \ diff --git a/frame/compat/bla_asum.h b/frame/compat/bla_asum.h index 406665913..824d437ed 100644 --- a/frame/compat/bla_asum.h +++ b/frame/compat/bla_asum.h @@ -41,8 +41,8 @@ \ ftype_r PASTEF772(chr,chx,blasname) \ ( \ - f77_int* n, \ - ftype_x* x, f77_int* incx \ + const f77_int* n, \ + const ftype_x* x, const f77_int* incx \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bla_axpy.c b/frame/compat/bla_axpy.c index 53c9e4832..1f4a51857 100644 --- a/frame/compat/bla_axpy.c +++ b/frame/compat/bla_axpy.c @@ -43,10 +43,10 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_int* n, \ - ftype* alpha, \ - ftype* x, f77_int* incx, \ - ftype* y, f77_int* incy \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* x, const f77_int* incx, \ + ftype* y, const f77_int* incy \ ) \ { \ dim_t n0; \ @@ -64,15 +64,15 @@ void PASTEF77(ch,blasname) \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ - bli_convert_blas_incv( n0, x, *incx, x0, incx0 ); \ - bli_convert_blas_incv( n0, y, *incy, y0, incy0 ); \ + bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \ + bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \ \ /* Call BLIS interface. */ \ PASTEMAC(ch,blisname) \ ( \ BLIS_NO_CONJUGATE, \ n0, \ - alpha, \ + (ftype*)alpha, \ x0, incx0, \ y0, incy0, \ NULL \ diff --git a/frame/compat/bla_axpy.h b/frame/compat/bla_axpy.h index a457f294d..1fc0a5bbf 100644 --- a/frame/compat/bla_axpy.h +++ b/frame/compat/bla_axpy.h @@ -41,10 +41,10 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_int* n, \ - ftype* alpha, \ - ftype* x, f77_int* incx, \ - ftype* y, f77_int* incy \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* x, const f77_int* incx, \ + ftype* y, const f77_int* incy \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bla_copy.c b/frame/compat/bla_copy.c index 26a32d3c4..95a558175 100644 --- a/frame/compat/bla_copy.c +++ b/frame/compat/bla_copy.c @@ -43,9 +43,9 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_int* n, \ - ftype* x, f77_int* incx, \ - ftype* y, f77_int* incy \ + const f77_int* n, \ + const ftype* x, const f77_int* incx, \ + ftype* y, const f77_int* incy \ ) \ { \ dim_t n0; \ @@ -63,8 +63,8 @@ void PASTEF77(ch,blasname) \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ - bli_convert_blas_incv( n0, x, *incx, x0, incx0 ); \ - bli_convert_blas_incv( n0, y, *incy, y0, incy0 ); \ + bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \ + bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \ \ /* Call BLIS interface. */ \ PASTEMAC(ch,blisname) \ diff --git a/frame/compat/bla_copy.h b/frame/compat/bla_copy.h index a2f9c4c4f..1b342322c 100644 --- a/frame/compat/bla_copy.h +++ b/frame/compat/bla_copy.h @@ -41,9 +41,9 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_int* n, \ - ftype* x, f77_int* incx, \ - ftype* y, f77_int* incy \ + const f77_int* n, \ + const ftype* x, const f77_int* incx, \ + ftype* y, const f77_int* incy \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bla_dot.c b/frame/compat/bla_dot.c index 949177fc7..11a960677 100644 --- a/frame/compat/bla_dot.c +++ b/frame/compat/bla_dot.c @@ -43,9 +43,9 @@ \ ftype PASTEF772(chxy,blasname,chc) \ ( \ - f77_int* n, \ - ftype* x, f77_int* incx, \ - ftype* y, f77_int* incy \ + const f77_int* n, \ + const ftype* x, const f77_int* incx, \ + const ftype* y, const f77_int* incy \ ) \ { \ dim_t n0; \ @@ -64,8 +64,8 @@ ftype PASTEF772(chxy,blasname,chc) \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ - bli_convert_blas_incv( n0, x, *incx, x0, incx0 ); \ - bli_convert_blas_incv( n0, y, *incy, y0, incy0 ); \ + bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \ + bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \ \ /* Call BLIS interface. */ \ PASTEMAC(chxy,blisname) \ @@ -95,9 +95,9 @@ INSERT_GENTFUNCDOT_BLAS( dot, dotv ) // with result returned in single precision. float PASTEF77(sd,sdot) ( - f77_int* n, - float* x, f77_int* incx, - float* y, f77_int* incy + const f77_int* n, + const float* x, const f77_int* incx, + const float* y, const f77_int* incy ) { return ( float )PASTEF77(d,sdot)( n, @@ -109,9 +109,9 @@ float PASTEF77(sd,sdot) // with result returned in double precision. double PASTEF77(d,sdot) ( - f77_int* n, - float* x, f77_int* incx, - float* y, f77_int* incy + const f77_int* n, + const float* x, const f77_int* incx, + const float* y, const f77_int* incy ) { dim_t n0; @@ -129,8 +129,8 @@ double PASTEF77(d,sdot) /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ - bli_convert_blas_incv( n0, x, *incx, x0, incx0 ); - bli_convert_blas_incv( n0, y, *incy, y0, incy0 ); + bli_convert_blas_incv( n0, (float*)x, *incx, x0, incx0 ); + bli_convert_blas_incv( n0, (float*)y, *incy, y0, incy0 ); rho = 0.0; diff --git a/frame/compat/bla_dot.h b/frame/compat/bla_dot.h index 168b739d2..c363d720b 100644 --- a/frame/compat/bla_dot.h +++ b/frame/compat/bla_dot.h @@ -41,9 +41,9 @@ \ ftype PASTEF772(chxy,blasname,chc) \ ( \ - f77_int* n, \ - ftype* x, f77_int* incx, \ - ftype* y, f77_int* incy \ + const f77_int* n, \ + const ftype* x, const f77_int* incx, \ + const ftype* y, const f77_int* incy \ ); #ifdef BLIS_ENABLE_BLAS2BLIS @@ -54,15 +54,15 @@ INSERT_GENTPROTDOT_BLAS( dot ) float PASTEF77(sd,sdot) ( - f77_int* n, - float* x, f77_int* incx, - float* y, f77_int* incy + const f77_int* n, + const float* x, const f77_int* incx, + const float* y, const f77_int* incy ); double PASTEF77(d,sdot) ( - f77_int* n, - float* x, f77_int* incx, - float* y, f77_int* incy + const f77_int* n, + const float* x, const f77_int* incx, + const float* y, const f77_int* incy ); #endif diff --git a/frame/compat/bla_gemm.c b/frame/compat/bla_gemm.c index 0cb0bb552..fe5918ec1 100644 --- a/frame/compat/bla_gemm.c +++ b/frame/compat/bla_gemm.c @@ -43,16 +43,16 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* transa, \ - f77_char* transb, \ - f77_int* m, \ - f77_int* n, \ - f77_int* k, \ - ftype* alpha, \ - ftype* a, f77_int* lda, \ - ftype* b, f77_int* ldb, \ - ftype* beta, \ - ftype* c, f77_int* ldc \ + const f77_char* transa, \ + const f77_char* transb, \ + const f77_int* m, \ + const f77_int* n, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ ) \ { \ trans_t blis_transa; \ @@ -106,11 +106,11 @@ void PASTEF77(ch,blasname) \ m0, \ n0, \ k0, \ - alpha, \ - a, rs_a, cs_a, \ - b, rs_b, cs_b, \ - beta, \ - c, rs_c, cs_c, \ + (ftype*)alpha, \ + (ftype*)a, rs_a, cs_a, \ + (ftype*)b, rs_b, cs_b, \ + (ftype*)beta, \ + (ftype*)c, rs_c, cs_c, \ NULL \ ); \ \ diff --git a/frame/compat/bla_gemm.h b/frame/compat/bla_gemm.h index 3e8993878..723455795 100644 --- a/frame/compat/bla_gemm.h +++ b/frame/compat/bla_gemm.h @@ -41,16 +41,16 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* transa, \ - f77_char* transb, \ - f77_int* m, \ - f77_int* n, \ - f77_int* k, \ - ftype* alpha, \ - ftype* a, f77_int* lda, \ - ftype* b, f77_int* ldb, \ - ftype* beta, \ - ftype* c, f77_int* ldc \ + const f77_char* transa, \ + const f77_char* transb, \ + const f77_int* m, \ + const f77_int* n, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bla_gemv.c b/frame/compat/bla_gemv.c index ea5076aa1..1723d1797 100644 --- a/frame/compat/bla_gemv.c +++ b/frame/compat/bla_gemv.c @@ -43,14 +43,14 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* transa, \ - f77_int* m, \ - f77_int* n, \ - ftype* alpha, \ - ftype* a, f77_int* lda, \ - ftype* x, f77_int* incx, \ - ftype* beta, \ - ftype* y, f77_int* incy \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* x, const f77_int* incx, \ + const ftype* beta, \ + ftype* y, const f77_int* incy \ ) \ { \ trans_t blis_transa; \ @@ -110,8 +110,8 @@ void PASTEF77(ch,blasname) \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ - bli_convert_blas_incv( n_x, x, *incx, x0, incx0 ); \ - bli_convert_blas_incv( m_y, y, *incy, y0, incy0 ); \ + bli_convert_blas_incv( n_x, (ftype*)x, *incx, x0, incx0 ); \ + bli_convert_blas_incv( m_y, (ftype*)y, *incy, y0, incy0 ); \ \ /* Set the row and column strides of A. */ \ rs_a = 1; \ @@ -124,10 +124,10 @@ void PASTEF77(ch,blasname) \ BLIS_NO_CONJUGATE, \ m0, \ n0, \ - alpha, \ - a, rs_a, cs_a, \ + (ftype*)alpha, \ + (ftype*)a, rs_a, cs_a, \ x0, incx0, \ - beta, \ + (ftype*)beta, \ y0, incy0, \ NULL \ ); \ diff --git a/frame/compat/bla_gemv.h b/frame/compat/bla_gemv.h index 6710aa223..ad512cff6 100644 --- a/frame/compat/bla_gemv.h +++ b/frame/compat/bla_gemv.h @@ -41,14 +41,14 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* transa, \ - f77_int* m, \ - f77_int* n, \ - ftype* alpha, \ - ftype* a, f77_int* lda, \ - ftype* x, f77_int* incx, \ - ftype* beta, \ - ftype* y, f77_int* incy \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* x, const f77_int* incx, \ + const ftype* beta, \ + ftype* y, const f77_int* incy \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bla_ger.c b/frame/compat/bla_ger.c index 12eefbac0..03dd5ee06 100644 --- a/frame/compat/bla_ger.c +++ b/frame/compat/bla_ger.c @@ -43,12 +43,12 @@ \ void PASTEF772(chxy,blasname,chc) \ ( \ - f77_int* m, \ - f77_int* n, \ - ftype* alpha, \ - ftype* x, f77_int* incx, \ - ftype* y, f77_int* incy, \ - ftype* a, f77_int* lda \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* x, const f77_int* incx, \ + const ftype* y, const f77_int* incy, \ + ftype* a, const f77_int* lda \ ) \ { \ dim_t m0, n0; \ @@ -80,8 +80,8 @@ void PASTEF772(chxy,blasname,chc) \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ - bli_convert_blas_incv( m0, x, *incx, x0, incx0 ); \ - bli_convert_blas_incv( n0, y, *incy, y0, incy0 ); \ + bli_convert_blas_incv( m0, (ftype*)x, *incx, x0, incx0 ); \ + bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \ \ /* Set the row and column strides of A. */ \ rs_a = 1; \ @@ -94,10 +94,10 @@ void PASTEF772(chxy,blasname,chc) \ blis_conjy, \ m0, \ n0, \ - alpha, \ + (ftype*)alpha, \ x0, incx0, \ y0, incy0, \ - a, rs_a, cs_a, \ + (ftype*)a, rs_a, cs_a, \ NULL \ ); \ \ diff --git a/frame/compat/bla_ger.h b/frame/compat/bla_ger.h index d1f2dc2f8..5b888d6e4 100644 --- a/frame/compat/bla_ger.h +++ b/frame/compat/bla_ger.h @@ -41,12 +41,12 @@ \ void PASTEF772(chxy,blasname,chc) \ ( \ - f77_int* m, \ - f77_int* n, \ - ftype* alpha, \ - ftype* x, f77_int* incx, \ - ftype* y, f77_int* incy, \ - ftype* a, f77_int* lda \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* x, const f77_int* incx, \ + const ftype* y, const f77_int* incy, \ + ftype* a, const f77_int* lda \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bla_hemm.c b/frame/compat/bla_hemm.c index 7bcc686fe..7cbb3b397 100644 --- a/frame/compat/bla_hemm.c +++ b/frame/compat/bla_hemm.c @@ -43,15 +43,15 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* side, \ - f77_char* uploa, \ - f77_int* m, \ - f77_int* n, \ - ftype* alpha, \ - ftype* a, f77_int* lda, \ - ftype* b, f77_int* ldb, \ - ftype* beta, \ - ftype* c, f77_int* ldc \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ ) \ { \ side_t blis_side; \ @@ -104,11 +104,11 @@ void PASTEF77(ch,blasname) \ BLIS_NO_TRANSPOSE, \ m0, \ n0, \ - alpha, \ - a, rs_a, cs_a, \ - b, rs_b, cs_b, \ - beta, \ - c, rs_c, cs_c, \ + (ftype*)alpha, \ + (ftype*)a, rs_a, cs_a, \ + (ftype*)b, rs_b, cs_b, \ + (ftype*)beta, \ + (ftype*)c, rs_c, cs_c, \ NULL \ ); \ \ diff --git a/frame/compat/bla_hemm.h b/frame/compat/bla_hemm.h index 348371f72..16a77357b 100644 --- a/frame/compat/bla_hemm.h +++ b/frame/compat/bla_hemm.h @@ -41,15 +41,15 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* side, \ - f77_char* uploa, \ - f77_int* m, \ - f77_int* n, \ - ftype* alpha, \ - ftype* a, f77_int* lda, \ - ftype* b, f77_int* ldb, \ - ftype* beta, \ - ftype* c, f77_int* ldc \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bla_hemv.c b/frame/compat/bla_hemv.c index 9b3e8cc1a..df53f5859 100644 --- a/frame/compat/bla_hemv.c +++ b/frame/compat/bla_hemv.c @@ -43,13 +43,13 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* uploa, \ - f77_int* m, \ - ftype* alpha, \ - ftype* a, f77_int* lda, \ - ftype* x, f77_int* incx, \ - ftype* beta, \ - ftype* y, f77_int* incy \ + const f77_char* uploa, \ + const f77_int* m, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* x, const f77_int* incx, \ + const ftype* beta, \ + ftype* y, const f77_int* incy \ ) \ { \ uplo_t blis_uploa; \ @@ -84,8 +84,8 @@ void PASTEF77(ch,blasname) \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ - bli_convert_blas_incv( m0, x, *incx, x0, incx0 ); \ - bli_convert_blas_incv( m0, y, *incy, y0, incy0 ); \ + bli_convert_blas_incv( m0, (ftype*)x, *incx, x0, incx0 ); \ + bli_convert_blas_incv( m0, (ftype*)y, *incy, y0, incy0 ); \ \ /* Set the row and column strides of A. */ \ rs_a = 1; \ @@ -98,10 +98,10 @@ void PASTEF77(ch,blasname) \ BLIS_NO_CONJUGATE, \ BLIS_NO_CONJUGATE, \ m0, \ - alpha, \ - a, rs_a, cs_a, \ + (ftype*)alpha, \ + (ftype*)a, rs_a, cs_a, \ x0, incx0, \ - beta, \ + (ftype*)beta, \ y0, incy0, \ NULL \ ); \ diff --git a/frame/compat/bla_hemv.h b/frame/compat/bla_hemv.h index 5fd030974..b1fedfaad 100644 --- a/frame/compat/bla_hemv.h +++ b/frame/compat/bla_hemv.h @@ -41,13 +41,13 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* uploa, \ - f77_int* m, \ - ftype* alpha, \ - ftype* a, f77_int* lda, \ - ftype* x, f77_int* incx, \ - ftype* beta, \ - ftype* y, f77_int* incy \ + const f77_char* uploa, \ + const f77_int* m, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* x, const f77_int* incx, \ + const ftype* beta, \ + ftype* y, const f77_int* incy \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bla_her.c b/frame/compat/bla_her.c index 45d255db7..5ba71344c 100644 --- a/frame/compat/bla_her.c +++ b/frame/compat/bla_her.c @@ -43,11 +43,11 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* uploa, \ - f77_int* m, \ - ftype_r* alpha, \ - ftype* x, f77_int* incx, \ - ftype* a, f77_int* lda \ + const f77_char* uploa, \ + const f77_int* m, \ + const ftype_r* alpha, \ + const ftype* x, const f77_int* incx, \ + ftype* a, const f77_int* lda \ ) \ { \ uplo_t blis_uploa; \ @@ -79,7 +79,7 @@ void PASTEF77(ch,blasname) \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ - bli_convert_blas_incv( m0, x, *incx, x0, incx0 ); \ + bli_convert_blas_incv( m0, (ftype*)x, *incx, x0, incx0 ); \ \ /* Set the row and column strides of A. */ \ rs_a = 1; \ @@ -91,9 +91,9 @@ void PASTEF77(ch,blasname) \ blis_uploa, \ BLIS_NO_CONJUGATE, \ m0, \ - alpha, \ + (ftype_r*)alpha, \ x0, incx0, \ - a, rs_a, cs_a, \ + (ftype*)a, rs_a, cs_a, \ NULL \ ); \ \ diff --git a/frame/compat/bla_her.h b/frame/compat/bla_her.h index cbae5b0c7..24e78f61b 100644 --- a/frame/compat/bla_her.h +++ b/frame/compat/bla_her.h @@ -41,11 +41,11 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* uploa, \ - f77_int* m, \ - ftype_r* alpha, \ - ftype* x, f77_int* incx, \ - ftype* a, f77_int* lda \ + const f77_char* uploa, \ + const f77_int* m, \ + const ftype_r* alpha, \ + const ftype* x, const f77_int* incx, \ + ftype* a, const f77_int* lda \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bla_her2.c b/frame/compat/bla_her2.c index e998da715..69ac084c7 100644 --- a/frame/compat/bla_her2.c +++ b/frame/compat/bla_her2.c @@ -43,12 +43,12 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* uploa, \ - f77_int* m, \ - ftype* alpha, \ - ftype* x, f77_int* incx, \ - ftype* y, f77_int* incy, \ - ftype* a, f77_int* lda \ + const f77_char* uploa, \ + const f77_int* m, \ + const ftype* alpha, \ + const ftype* x, const f77_int* incx, \ + const ftype* y, const f77_int* incy, \ + ftype* a, const f77_int* lda \ ) \ { \ uplo_t blis_uploa; \ @@ -83,8 +83,8 @@ void PASTEF77(ch,blasname) \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ - bli_convert_blas_incv( m0, x, *incx, x0, incx0 ); \ - bli_convert_blas_incv( m0, y, *incy, y0, incy0 ); \ + bli_convert_blas_incv( m0, (ftype*)x, *incx, x0, incx0 ); \ + bli_convert_blas_incv( m0, (ftype*)y, *incy, y0, incy0 ); \ \ /* Set the row and column strides of A. */ \ rs_a = 1; \ @@ -97,10 +97,10 @@ void PASTEF77(ch,blasname) \ BLIS_NO_CONJUGATE, \ BLIS_NO_CONJUGATE, \ m0, \ - alpha, \ + (ftype*)alpha, \ x0, incx0, \ y0, incy0, \ - a, rs_a, cs_a, \ + (ftype*)a, rs_a, cs_a, \ NULL \ ); \ \ diff --git a/frame/compat/bla_her2.h b/frame/compat/bla_her2.h index f4db4f711..a4734e6d4 100644 --- a/frame/compat/bla_her2.h +++ b/frame/compat/bla_her2.h @@ -41,12 +41,12 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* uploa, \ - f77_int* m, \ - ftype* alpha, \ - ftype* x, f77_int* incx, \ - ftype* y, f77_int* incy, \ - ftype* a, f77_int* lda \ + const f77_char* uploa, \ + const f77_int* m, \ + const ftype* alpha, \ + const ftype* x, const f77_int* incx, \ + const ftype* y, const f77_int* incy, \ + ftype* a, const f77_int* lda \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bla_her2k.c b/frame/compat/bla_her2k.c index b58c7b93b..85b4d522a 100644 --- a/frame/compat/bla_her2k.c +++ b/frame/compat/bla_her2k.c @@ -43,15 +43,15 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* uploc, \ - f77_char* transa, \ - f77_int* m, \ - f77_int* k, \ - ftype* alpha, \ - ftype* a, f77_int* lda, \ - ftype* b, f77_int* ldb, \ - ftype_r* beta, \ - ftype* c, f77_int* ldc \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype_r* beta, \ + ftype* c, const f77_int* ldc \ ) \ { \ uplo_t blis_uploc; \ @@ -120,11 +120,11 @@ void PASTEF77(ch,blasname) \ blis_transa, \ m0, \ k0, \ - alpha, \ - a, rs_a, cs_a, \ - b, rs_b, cs_b, \ - beta, \ - c, rs_c, cs_c, \ + (ftype*)alpha, \ + (ftype*)a, rs_a, cs_a, \ + (ftype*)b, rs_b, cs_b, \ + (ftype_r*)beta, \ + (ftype*)c, rs_c, cs_c, \ NULL \ ); \ \ diff --git a/frame/compat/bla_her2k.h b/frame/compat/bla_her2k.h index 055c0fcf2..26b60a9bb 100644 --- a/frame/compat/bla_her2k.h +++ b/frame/compat/bla_her2k.h @@ -41,15 +41,15 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* uploc, \ - f77_char* transa, \ - f77_int* m, \ - f77_int* k, \ - ftype* alpha, \ - ftype* a, f77_int* lda, \ - ftype* b, f77_int* ldb, \ - ftype_r* beta, \ - ftype* c, f77_int* ldc \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype_r* beta, \ + ftype* c, const f77_int* ldc \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bla_herk.c b/frame/compat/bla_herk.c index 17b0bedcd..8ce0961e2 100644 --- a/frame/compat/bla_herk.c +++ b/frame/compat/bla_herk.c @@ -43,14 +43,14 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* uploc, \ - f77_char* transa, \ - f77_int* m, \ - f77_int* k, \ - ftype_r* alpha, \ - ftype* a, f77_int* lda, \ - ftype_r* beta, \ - ftype* c, f77_int* ldc \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype_r* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype_r* beta, \ + ftype* c, const f77_int* ldc \ ) \ { \ uplo_t blis_uploc; \ @@ -114,10 +114,10 @@ void PASTEF77(ch,blasname) \ blis_transa, \ m0, \ k0, \ - alpha, \ - a, rs_a, cs_a, \ - beta, \ - c, rs_c, cs_c, \ + (ftype_r*)alpha, \ + (ftype*)a, rs_a, cs_a, \ + (ftype_r*)beta, \ + (ftype*)c, rs_c, cs_c, \ NULL \ ); \ \ diff --git a/frame/compat/bla_herk.h b/frame/compat/bla_herk.h index dfffd0b08..25f711e7c 100644 --- a/frame/compat/bla_herk.h +++ b/frame/compat/bla_herk.h @@ -41,14 +41,14 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* uploc, \ - f77_char* transa, \ - f77_int* m, \ - f77_int* k, \ - ftype_r* alpha, \ - ftype* a, f77_int* lda, \ - ftype_r* beta, \ - ftype* c, f77_int* ldc \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype_r* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype_r* beta, \ + ftype* c, const f77_int* ldc \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bla_nrm2.c b/frame/compat/bla_nrm2.c index a575a4088..1e4c91363 100644 --- a/frame/compat/bla_nrm2.c +++ b/frame/compat/bla_nrm2.c @@ -43,8 +43,8 @@ \ ftype_r PASTEF772(chr,chx,blasname) \ ( \ - f77_int* n, \ - ftype_x* x, f77_int* incx \ + const f77_int* n, \ + const ftype_x* x, const f77_int* incx \ ) \ { \ dim_t n0; \ @@ -61,7 +61,7 @@ ftype_r PASTEF772(chr,chx,blasname) \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ - bli_convert_blas_incv( n0, x, *incx, x0, incx0 ); \ + bli_convert_blas_incv( n0, (ftype_x*)x, *incx, x0, incx0 ); \ \ /* Call BLIS interface. */ \ PASTEMAC(chx,blisname) \ diff --git a/frame/compat/bla_nrm2.h b/frame/compat/bla_nrm2.h index dadbc5fc4..30480eeb2 100644 --- a/frame/compat/bla_nrm2.h +++ b/frame/compat/bla_nrm2.h @@ -41,8 +41,8 @@ \ ftype_r PASTEF772(chr,chx,blasname) \ ( \ - f77_int* n, \ - ftype_x* x, f77_int* incx \ + const f77_int* n, \ + const ftype_x* x, const f77_int* incx \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bla_scal.c b/frame/compat/bla_scal.c index 9258a073c..e386dd93a 100644 --- a/frame/compat/bla_scal.c +++ b/frame/compat/bla_scal.c @@ -43,9 +43,9 @@ \ void PASTEF772(chx,cha,blasname) \ ( \ - f77_int* n, \ - ftype_a* alpha, \ - ftype_x* x, f77_int* incx \ + const f77_int* n, \ + const ftype_a* alpha, \ + ftype_x* x, const f77_int* incx \ ) \ { \ dim_t n0; \ @@ -62,13 +62,13 @@ void PASTEF772(chx,cha,blasname) \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ - bli_convert_blas_incv( n0, x, *incx, x0, incx0 ); \ + bli_convert_blas_incv( n0, (ftype_x*)x, *incx, x0, incx0 ); \ \ /* NOTE: We do not natively implement BLAS's csscal/zdscal in BLIS. that is, we just always sub-optimally implement those cases by casting alpha to ctype_x (potentially the complex domain) and using the homogeneous datatype instance according to that type. */ \ - PASTEMAC2(cha,chx,cast)( alpha, alpha_cast ); \ + PASTEMAC2(cha,chx,cast)( (ftype_a*)alpha, alpha_cast ); \ \ /* Call BLIS interface. */ \ PASTEMAC(chx,blisname) \ diff --git a/frame/compat/bla_scal.h b/frame/compat/bla_scal.h index 7a15364d6..d9f1617e2 100644 --- a/frame/compat/bla_scal.h +++ b/frame/compat/bla_scal.h @@ -41,9 +41,9 @@ \ void PASTEF772(chx,cha,blasname) \ ( \ - f77_int* n, \ - ftype_a* alpha, \ - ftype_x* x, f77_int* incx \ + const f77_int* n, \ + const ftype_a* alpha, \ + ftype_x* x, const f77_int* incx \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bla_swap.c b/frame/compat/bla_swap.c index cf22603a9..51ab7225a 100644 --- a/frame/compat/bla_swap.c +++ b/frame/compat/bla_swap.c @@ -43,9 +43,9 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_int* n, \ - ftype* x, f77_int* incx, \ - ftype* y, f77_int* incy \ + const f77_int* n, \ + ftype* x, const f77_int* incx, \ + ftype* y, const f77_int* incy \ ) \ { \ dim_t n0; \ @@ -63,8 +63,8 @@ void PASTEF77(ch,blasname) \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ - bli_convert_blas_incv( n0, x, *incx, x0, incx0 ); \ - bli_convert_blas_incv( n0, y, *incy, y0, incy0 ); \ + bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \ + bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \ \ /* Call BLIS interface. */ \ PASTEMAC(ch,blisname) \ diff --git a/frame/compat/bla_swap.h b/frame/compat/bla_swap.h index 53ec754bb..520462fbb 100644 --- a/frame/compat/bla_swap.h +++ b/frame/compat/bla_swap.h @@ -41,9 +41,9 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_int* n, \ - ftype* x, f77_int* incx, \ - ftype* y, f77_int* incy \ + const f77_int* n, \ + ftype* x, const f77_int* incx, \ + ftype* y, const f77_int* incy \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bla_symm.c b/frame/compat/bla_symm.c index 3322faad3..df26a6d22 100644 --- a/frame/compat/bla_symm.c +++ b/frame/compat/bla_symm.c @@ -43,15 +43,15 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* side, \ - f77_char* uploa, \ - f77_int* m, \ - f77_int* n, \ - ftype* alpha, \ - ftype* a, f77_int* lda, \ - ftype* b, f77_int* ldb, \ - ftype* beta, \ - ftype* c, f77_int* ldc \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ ) \ { \ side_t blis_side; \ @@ -104,11 +104,11 @@ void PASTEF77(ch,blasname) \ BLIS_NO_TRANSPOSE, \ m0, \ n0, \ - alpha, \ - a, rs_a, cs_a, \ - b, rs_b, cs_b, \ - beta, \ - c, rs_c, cs_c, \ + (ftype*)alpha, \ + (ftype*)a, rs_a, cs_a, \ + (ftype*)b, rs_b, cs_b, \ + (ftype*)beta, \ + (ftype*)c, rs_c, cs_c, \ NULL \ ); \ \ diff --git a/frame/compat/bla_symm.h b/frame/compat/bla_symm.h index 492bbcdd1..7b311705d 100644 --- a/frame/compat/bla_symm.h +++ b/frame/compat/bla_symm.h @@ -41,15 +41,15 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* side, \ - f77_char* uploa, \ - f77_int* m, \ - f77_int* n, \ - ftype* alpha, \ - ftype* a, f77_int* lda, \ - ftype* b, f77_int* ldb, \ - ftype* beta, \ - ftype* c, f77_int* ldc \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bla_symv.c b/frame/compat/bla_symv.c index 5ed847721..49406afbc 100644 --- a/frame/compat/bla_symv.c +++ b/frame/compat/bla_symv.c @@ -43,13 +43,13 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* uploa, \ - f77_int* m, \ - ftype* alpha, \ - ftype* a, f77_int* lda, \ - ftype* x, f77_int* incx, \ - ftype* beta, \ - ftype* y, f77_int* incy \ + const f77_char* uploa, \ + const f77_int* m, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* x, const f77_int* incx, \ + const ftype* beta, \ + ftype* y, const f77_int* incy \ ) \ { \ uplo_t blis_uploa; \ @@ -84,8 +84,8 @@ void PASTEF77(ch,blasname) \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ - bli_convert_blas_incv( m0, x, *incx, x0, incx0 ); \ - bli_convert_blas_incv( m0, y, *incy, y0, incy0 ); \ + bli_convert_blas_incv( m0, (ftype*)x, *incx, x0, incx0 ); \ + bli_convert_blas_incv( m0, (ftype*)y, *incy, y0, incy0 ); \ \ /* Set the row and column strides of A. */ \ rs_a = 1; \ @@ -98,10 +98,10 @@ void PASTEF77(ch,blasname) \ BLIS_NO_CONJUGATE, \ BLIS_NO_CONJUGATE, \ m0, \ - alpha, \ - a, rs_a, cs_a, \ + (ftype*)alpha, \ + (ftype*)a, rs_a, cs_a, \ x0, incx0, \ - beta, \ + (ftype*)beta, \ y0, incy0, \ NULL \ ); \ diff --git a/frame/compat/bla_symv.h b/frame/compat/bla_symv.h index cb9fffcfd..331f0e8d7 100644 --- a/frame/compat/bla_symv.h +++ b/frame/compat/bla_symv.h @@ -41,13 +41,13 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* uploa, \ - f77_int* m, \ - ftype* alpha, \ - ftype* a, f77_int* lda, \ - ftype* x, f77_int* incx, \ - ftype* beta, \ - ftype* y, f77_int* incy \ + const f77_char* uploa, \ + const f77_int* m, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* x, const f77_int* incx, \ + const ftype* beta, \ + ftype* y, const f77_int* incy \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bla_syr.c b/frame/compat/bla_syr.c index 112b25c8d..86db9b1e4 100644 --- a/frame/compat/bla_syr.c +++ b/frame/compat/bla_syr.c @@ -43,11 +43,11 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* uploa, \ - f77_int* m, \ - ftype* alpha, \ - ftype* x, f77_int* incx, \ - ftype* a, f77_int* lda \ + const f77_char* uploa, \ + const f77_int* m, \ + const ftype* alpha, \ + const ftype* x, const f77_int* incx, \ + ftype* a, const f77_int* lda \ ) \ { \ uplo_t blis_uploa; \ @@ -79,7 +79,7 @@ void PASTEF77(ch,blasname) \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ - bli_convert_blas_incv( m0, x, *incx, x0, incx0 ); \ + bli_convert_blas_incv( m0, (ftype*)x, *incx, x0, incx0 ); \ \ /* Set the row and column strides of A. */ \ rs_a = 1; \ @@ -91,9 +91,9 @@ void PASTEF77(ch,blasname) \ blis_uploa, \ BLIS_NO_CONJUGATE, \ m0, \ - alpha, \ + (ftype*)alpha, \ x0, incx0, \ - a, rs_a, cs_a, \ + (ftype*)a, rs_a, cs_a, \ NULL \ ); \ \ diff --git a/frame/compat/bla_syr.h b/frame/compat/bla_syr.h index c1260bd2f..0847eac96 100644 --- a/frame/compat/bla_syr.h +++ b/frame/compat/bla_syr.h @@ -41,11 +41,11 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* uploa, \ - f77_int* m, \ - ftype* alpha, \ - ftype* x, f77_int* incx, \ - ftype* a, f77_int* lda \ + const f77_char* uploa, \ + const f77_int* m, \ + const ftype* alpha, \ + const ftype* x, const f77_int* incx, \ + ftype* a, const f77_int* lda \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bla_syr2.c b/frame/compat/bla_syr2.c index 0a4d5a4b5..f2adf6dc2 100644 --- a/frame/compat/bla_syr2.c +++ b/frame/compat/bla_syr2.c @@ -43,12 +43,12 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* uploa, \ - f77_int* m, \ - ftype* alpha, \ - ftype* x, f77_int* incx, \ - ftype* y, f77_int* incy, \ - ftype* a, f77_int* lda \ + const f77_char* uploa, \ + const f77_int* m, \ + const ftype* alpha, \ + const ftype* x, const f77_int* incx, \ + const ftype* y, const f77_int* incy, \ + ftype* a, const f77_int* lda \ ) \ { \ uplo_t blis_uploa; \ @@ -84,8 +84,8 @@ void PASTEF77(ch,blasname) \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ - bli_convert_blas_incv( m0, x, *incx, x0, incx0 ); \ - bli_convert_blas_incv( m0, y, *incy, y0, incy0 ); \ + bli_convert_blas_incv( m0, (ftype*)x, *incx, x0, incx0 ); \ + bli_convert_blas_incv( m0, (ftype*)y, *incy, y0, incy0 ); \ \ /* Set the row and column strides of A. */ \ rs_a = 1; \ @@ -98,10 +98,10 @@ void PASTEF77(ch,blasname) \ BLIS_NO_CONJUGATE, \ BLIS_NO_CONJUGATE, \ m0, \ - alpha, \ + (ftype*)alpha, \ x0, incx0, \ y0, incy0, \ - a, rs_a, cs_a, \ + (ftype*)a, rs_a, cs_a, \ NULL \ ); \ \ diff --git a/frame/compat/bla_syr2.h b/frame/compat/bla_syr2.h index 34ec60d81..0ec658df5 100644 --- a/frame/compat/bla_syr2.h +++ b/frame/compat/bla_syr2.h @@ -41,12 +41,12 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* uploa, \ - f77_int* m, \ - ftype* alpha, \ - ftype* x, f77_int* incx, \ - ftype* y, f77_int* incy, \ - ftype* a, f77_int* lda \ + const f77_char* uploa, \ + const f77_int* m, \ + const ftype* alpha, \ + const ftype* x, const f77_int* incx, \ + const ftype* y, const f77_int* incy, \ + ftype* a, const f77_int* lda \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bla_syr2k.c b/frame/compat/bla_syr2k.c index 637d2ac3e..895445d44 100644 --- a/frame/compat/bla_syr2k.c +++ b/frame/compat/bla_syr2k.c @@ -43,15 +43,15 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* uploc, \ - f77_char* transa, \ - f77_int* m, \ - f77_int* k, \ - ftype* alpha, \ - ftype* a, f77_int* lda, \ - ftype* b, f77_int* ldb, \ - ftype* beta, \ - ftype* c, f77_int* ldc \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ ) \ { \ uplo_t blis_uploc; \ @@ -112,11 +112,11 @@ void PASTEF77(ch,blasname) \ blis_transa, \ m0, \ k0, \ - alpha, \ - a, rs_a, cs_a, \ - b, rs_b, cs_b, \ - beta, \ - c, rs_c, cs_c, \ + (ftype*)alpha, \ + (ftype*)a, rs_a, cs_a, \ + (ftype*)b, rs_b, cs_b, \ + (ftype*)beta, \ + (ftype*)c, rs_c, cs_c, \ NULL \ ); \ \ diff --git a/frame/compat/bla_syr2k.h b/frame/compat/bla_syr2k.h index ac4bbaf47..17bbba537 100644 --- a/frame/compat/bla_syr2k.h +++ b/frame/compat/bla_syr2k.h @@ -41,15 +41,15 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* uploc, \ - f77_char* transa, \ - f77_int* m, \ - f77_int* k, \ - ftype* alpha, \ - ftype* a, f77_int* lda, \ - ftype* b, f77_int* ldb, \ - ftype* beta, \ - ftype* c, f77_int* ldc \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* b, const f77_int* ldb, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bla_syrk.c b/frame/compat/bla_syrk.c index d48012b3f..dbde1922c 100644 --- a/frame/compat/bla_syrk.c +++ b/frame/compat/bla_syrk.c @@ -43,14 +43,14 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* uploc, \ - f77_char* transa, \ - f77_int* m, \ - f77_int* k, \ - ftype* alpha, \ - ftype* a, f77_int* lda, \ - ftype* beta, \ - ftype* c, f77_int* ldc \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ ) \ { \ uplo_t blis_uploc; \ @@ -106,10 +106,10 @@ void PASTEF77(ch,blasname) \ blis_transa, \ m0, \ k0, \ - alpha, \ - a, rs_a, cs_a, \ - beta, \ - c, rs_c, cs_c, \ + (ftype*)alpha, \ + (ftype*)a, rs_a, cs_a, \ + (ftype*)beta, \ + (ftype*)c, rs_c, cs_c, \ NULL \ ); \ \ diff --git a/frame/compat/bla_syrk.h b/frame/compat/bla_syrk.h index 2eed8ddba..384ac5a13 100644 --- a/frame/compat/bla_syrk.h +++ b/frame/compat/bla_syrk.h @@ -41,14 +41,14 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* uploc, \ - f77_char* transa, \ - f77_int* m, \ - f77_int* k, \ - ftype* alpha, \ - ftype* a, f77_int* lda, \ - ftype* beta, \ - ftype* c, f77_int* ldc \ + const f77_char* uploc, \ + const f77_char* transa, \ + const f77_int* m, \ + const f77_int* k, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + const ftype* beta, \ + ftype* c, const f77_int* ldc \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bla_trmm.c b/frame/compat/bla_trmm.c index c591f5b26..42f8b467d 100644 --- a/frame/compat/bla_trmm.c +++ b/frame/compat/bla_trmm.c @@ -43,15 +43,15 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* side, \ - f77_char* uploa, \ - f77_char* transa, \ - f77_char* diaga, \ - f77_int* m, \ - f77_int* n, \ - ftype* alpha, \ - ftype* a, f77_int* lda, \ - ftype* b, f77_int* ldb \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + ftype* b, const f77_int* ldb \ ) \ { \ side_t blis_side; \ @@ -106,9 +106,9 @@ void PASTEF77(ch,blasname) \ blis_diaga, \ m0, \ n0, \ - alpha, \ - a, rs_a, cs_a, \ - b, rs_b, cs_b, \ + (ftype*)alpha, \ + (ftype*)a, rs_a, cs_a, \ + (ftype*)b, rs_b, cs_b, \ NULL \ ); \ \ diff --git a/frame/compat/bla_trmm.h b/frame/compat/bla_trmm.h index 8ff642a94..4fcfa88e0 100644 --- a/frame/compat/bla_trmm.h +++ b/frame/compat/bla_trmm.h @@ -41,15 +41,15 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* side, \ - f77_char* uploa, \ - f77_char* transa, \ - f77_char* diaga, \ - f77_int* m, \ - f77_int* n, \ - ftype* alpha, \ - ftype* a, f77_int* lda, \ - ftype* b, f77_int* ldb \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + ftype* b, const f77_int* ldb \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bla_trmv.c b/frame/compat/bla_trmv.c index f47a677ff..b2377da39 100644 --- a/frame/compat/bla_trmv.c +++ b/frame/compat/bla_trmv.c @@ -43,12 +43,12 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* uploa, \ - f77_char* transa, \ - f77_char* diaga, \ - f77_int* m, \ - ftype* a, f77_int* lda, \ - ftype* x, f77_int* incx \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const ftype* a, const f77_int* lda, \ + ftype* x, const f77_int* incx \ ) \ { \ uplo_t blis_uploa; \ @@ -87,7 +87,7 @@ void PASTEF77(ch,blasname) \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ - bli_convert_blas_incv( m0, x, *incx, x0, incx0 ); \ + bli_convert_blas_incv( m0, (ftype*)x, *incx, x0, incx0 ); \ \ /* Set the row and column strides of A. */ \ rs_a = 1; \ @@ -104,7 +104,7 @@ void PASTEF77(ch,blasname) \ blis_diaga, \ m0, \ one_p, \ - a, rs_a, cs_a, \ + (ftype*)a, rs_a, cs_a, \ x0, incx0, \ NULL \ ); \ diff --git a/frame/compat/bla_trmv.h b/frame/compat/bla_trmv.h index fb3ccca09..df6ad8ce0 100644 --- a/frame/compat/bla_trmv.h +++ b/frame/compat/bla_trmv.h @@ -41,12 +41,12 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* uploa, \ - f77_char* transa, \ - f77_char* diaga, \ - f77_int* m, \ - ftype* a, f77_int* lda, \ - ftype* x, f77_int* incx \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const ftype* a, const f77_int* lda, \ + ftype* x, const f77_int* incx \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bla_trsm.c b/frame/compat/bla_trsm.c index e028c4b94..c06d9d2fc 100644 --- a/frame/compat/bla_trsm.c +++ b/frame/compat/bla_trsm.c @@ -43,15 +43,15 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* side, \ - f77_char* uploa, \ - f77_char* transa, \ - f77_char* diaga, \ - f77_int* m, \ - f77_int* n, \ - ftype* alpha, \ - ftype* a, f77_int* lda, \ - ftype* b, f77_int* ldb \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + ftype* b, const f77_int* ldb \ ) \ { \ side_t blis_side; \ @@ -106,9 +106,9 @@ void PASTEF77(ch,blasname) \ blis_diaga, \ m0, \ n0, \ - alpha, \ - a, rs_a, cs_a, \ - b, rs_b, cs_b, \ + (ftype*)alpha, \ + (ftype*)a, rs_a, cs_a, \ + (ftype*)b, rs_b, cs_b, \ NULL \ ); \ \ diff --git a/frame/compat/bla_trsm.h b/frame/compat/bla_trsm.h index 8ef12fe9c..ff5a8ee40 100644 --- a/frame/compat/bla_trsm.h +++ b/frame/compat/bla_trsm.h @@ -41,15 +41,15 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* side, \ - f77_char* uploa, \ - f77_char* transa, \ - f77_char* diaga, \ - f77_int* m, \ - f77_int* n, \ - ftype* alpha, \ - ftype* a, f77_int* lda, \ - ftype* b, f77_int* ldb \ + const f77_char* side, \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const f77_int* n, \ + const ftype* alpha, \ + const ftype* a, const f77_int* lda, \ + ftype* b, const f77_int* ldb \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bla_trsv.c b/frame/compat/bla_trsv.c index bff4016de..0fa7ad453 100644 --- a/frame/compat/bla_trsv.c +++ b/frame/compat/bla_trsv.c @@ -43,12 +43,12 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* uploa, \ - f77_char* transa, \ - f77_char* diaga, \ - f77_int* m, \ - ftype* a, f77_int* lda, \ - ftype* x, f77_int* incx \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const ftype* a, const f77_int* lda, \ + ftype* x, const f77_int* incx \ ) \ { \ uplo_t blis_uploa; \ @@ -87,7 +87,7 @@ void PASTEF77(ch,blasname) \ \ /* If the input increments are negative, adjust the pointers so we can use positive increments instead. */ \ - bli_convert_blas_incv( m0, x, *incx, x0, incx0 ); \ + bli_convert_blas_incv( m0, (ftype*)x, *incx, x0, incx0 ); \ \ /* Set the row and column strides of A. */ \ rs_a = 1; \ @@ -104,7 +104,7 @@ void PASTEF77(ch,blasname) \ blis_diaga, \ m0, \ one_p, \ - a, rs_a, cs_a, \ + (ftype*)a, rs_a, cs_a, \ x0, incx0, \ NULL \ ); \ diff --git a/frame/compat/bla_trsv.h b/frame/compat/bla_trsv.h index 2292ad021..19ea1f387 100644 --- a/frame/compat/bla_trsv.h +++ b/frame/compat/bla_trsv.h @@ -41,12 +41,12 @@ \ void PASTEF77(ch,blasname) \ ( \ - f77_char* uploa, \ - f77_char* transa, \ - f77_char* diaga, \ - f77_int* m, \ - ftype* a, f77_int* lda, \ - ftype* x, f77_int* incx \ + const f77_char* uploa, \ + const f77_char* transa, \ + const f77_char* diaga, \ + const f77_int* m, \ + const ftype* a, const f77_int* lda, \ + ftype* x, const f77_int* incx \ ); #ifdef BLIS_ENABLE_BLAS2BLIS diff --git a/frame/compat/bli_blas.h b/frame/compat/bli_blas.h index a046d305a..5583e456b 100644 --- a/frame/compat/bli_blas.h +++ b/frame/compat/bli_blas.h @@ -32,6 +32,15 @@ */ +#ifdef BLIS_ENABLE_CBLAS + +// If the BLAS compatibility layer was not explicitly enabled, we must +// enable it here. +#ifndef BLIS_ENABLE_BLAS2BLIS +#define BLIS_ENABLE_BLAS2BLIS +#endif + +#endif // BLIS_ENABLE_CBLAS #ifdef BLIS_ENABLE_BLAS2BLIS @@ -85,6 +94,11 @@ #include "bla_scal.h" #include "bla_swap.h" +#include "f77_amax_sub.h" +#include "f77_asum_sub.h" +#include "f77_dot_sub.h" +#include "f77_nrm2_sub.h" + // -- Level-2 BLAS prototypes -- diff --git a/frame/compat/cblas/bli_cblas.h b/frame/compat/cblas/bli_cblas.h index da692d70c..0cffbe9bb 100644 --- a/frame/compat/cblas/bli_cblas.h +++ b/frame/compat/cblas/bli_cblas.h @@ -35,22 +35,14 @@ #ifndef BLIS_CBLAS_H #define BLIS_CBLAS_H - #ifdef BLIS_ENABLE_CBLAS -// If the BLAS compatibility layer was not explicitly enabled, we must -// enable it here. -#ifndef BLIS_ENABLE_BLAS2BLIS -#define BLIS_ENABLE_BLAS2BLIS -#endif -// Force trailing underscores. BLIS does not support any other type of -// Fortran name-mangling. -#define ADD_ - -// Define the Fortran integer to be the same kind assumed by BLIS's -// BLAS compatibility layer. -#define F77_INT f77_int +// Undefine these macros so that no internal conversion is done by CBLAS. +// The function signatures have been modified to use the proper integer types +// directly. +#undef F77_INT +#undef F77_CHAR // Include the main CBLAS header so that including this header file // (probably via blis.h) allows applications to access CBLAS diff --git a/frame/compat/cblas/f77_sub/f77_amax_sub.c b/frame/compat/cblas/f77_sub/f77_amax_sub.c index d8402c1c5..a6f55d8c6 100644 --- a/frame/compat/cblas/f77_sub/f77_amax_sub.c +++ b/frame/compat/cblas/f77_sub/f77_amax_sub.c @@ -43,8 +43,8 @@ #define GENTFUNC( ftype_x, chx, blasname, blisname ) \ \ void PASTEF773(i,chx,blasname,sub)( \ - f77_int* n, \ - ftype_x* x, f77_int* incx, \ + const f77_int* n, \ + const ftype_x* x, const f77_int* incx, \ f77_int* rval \ ) \ { \ diff --git a/frame/compat/cblas/f77_sub/f77_amax_sub.h b/frame/compat/cblas/f77_sub/f77_amax_sub.h index e417b1ca7..42ee48d97 100644 --- a/frame/compat/cblas/f77_sub/f77_amax_sub.h +++ b/frame/compat/cblas/f77_sub/f77_amax_sub.h @@ -40,8 +40,8 @@ #define GENTPROT( ftype_x, chx, blasname ) \ \ void PASTEF773(i,chx,blasname,sub)( \ - f77_int* n, \ - ftype_x* x, f77_int* incx, \ + const f77_int* n, \ + const ftype_x* x, const f77_int* incx, \ f77_int* rval \ ); diff --git a/frame/compat/cblas/f77_sub/f77_asum_sub.c b/frame/compat/cblas/f77_sub/f77_asum_sub.c index 56025962c..c8fc069e5 100644 --- a/frame/compat/cblas/f77_sub/f77_asum_sub.c +++ b/frame/compat/cblas/f77_sub/f77_asum_sub.c @@ -43,8 +43,8 @@ #define GENTFUNCR2( ftype_x, ftype_r, chx, chr, blasname, blisname ) \ \ void PASTEF773(chr,chx,blasname,sub)( \ - f77_int* n, \ - ftype_x* x, f77_int* incx, \ + const f77_int* n, \ + const ftype_x* x, const f77_int* incx, \ ftype_r* rval \ ) \ { \ diff --git a/frame/compat/cblas/f77_sub/f77_asum_sub.h b/frame/compat/cblas/f77_sub/f77_asum_sub.h index 3517c8df0..284ec760b 100644 --- a/frame/compat/cblas/f77_sub/f77_asum_sub.h +++ b/frame/compat/cblas/f77_sub/f77_asum_sub.h @@ -40,8 +40,8 @@ #define GENTPROTR2( ftype_x, ftype_r, chx, chr, blasname ) \ \ void PASTEF773(chr,chx,blasname,sub)( \ - f77_int* n, \ - ftype_x* x, f77_int* incx, \ + const f77_int* n, \ + const ftype_x* x, const f77_int* incx, \ ftype_r* rval \ ); diff --git a/frame/compat/cblas/f77_sub/f77_dot_sub.c b/frame/compat/cblas/f77_sub/f77_dot_sub.c index a299609d3..0f4b09bc4 100644 --- a/frame/compat/cblas/f77_sub/f77_dot_sub.c +++ b/frame/compat/cblas/f77_sub/f77_dot_sub.c @@ -43,9 +43,9 @@ #define GENTFUNCDOT( ftype, chxy, chc, blis_conjx, blasname, blisname ) \ \ void PASTEF773(chxy,blasname,chc,sub)( \ - f77_int* n, \ - ftype* x, f77_int* incx, \ - ftype* y, f77_int* incy, \ + const f77_int* n, \ + const ftype* x, const f77_int* incx, \ + const ftype* y, const f77_int* incy, \ ftype* rval \ ) \ { \ @@ -62,22 +62,23 @@ INSERT_GENTFUNCDOT_BLAS( dot, NULL ) // Input vectors stored in single precision, computed in double precision, // with result returned in single precision. -void PASTEF772(sds,dot,sub)( f77_int* n, - float* x, f77_int* incx, - float* y, f77_int* incy, +void PASTEF772(sds,dot,sub)( const f77_int* n, + const float* sb, + const float* x, const f77_int* incx, + const float* y, const f77_int* incy, float* rval ) { - *rval = PASTEF77(sds,dot)( n, - x, incx, - y, incy ); + *rval = *sb + PASTEF77(sds,dot)( n, + x, incx, + y, incy ); } // Input vectors stored in single precision, computed in double precision, // with result returned in double precision. -void PASTEF772(ds,dot,sub)( f77_int* n, - float* x, f77_int* incx, - float* y, f77_int* incy, +void PASTEF772(ds,dot,sub)( const f77_int* n, + const float* x, const f77_int* incx, + const float* y, const f77_int* incy, double* rval ) { diff --git a/frame/compat/cblas/f77_sub/f77_dot_sub.h b/frame/compat/cblas/f77_sub/f77_dot_sub.h index 94eb0973c..70eeec93e 100644 --- a/frame/compat/cblas/f77_sub/f77_dot_sub.h +++ b/frame/compat/cblas/f77_sub/f77_dot_sub.h @@ -40,9 +40,9 @@ #define GENTPROTDOT( ftype, chxy, chc, blasname ) \ \ void PASTEF773(chxy,blasname,chc,sub)( \ - f77_int* n, \ - ftype* x, f77_int* incx, \ - ftype* y, f77_int* incy, \ + const f77_int* n, \ + const ftype* x, const f77_int* incx, \ + const ftype* y, const f77_int* incy, \ ftype* rval \ ); @@ -52,15 +52,16 @@ INSERT_GENTPROTDOT_BLAS( dot ) // -- "Black sheep" dot product function prototypes -- -void PASTEF772(sds,dot,sub)( f77_int* n, - float* x, f77_int* incx, - float* y, f77_int* incy, +void PASTEF772(sds,dot,sub)( const f77_int* n, + const float* sb, + const float* x, const f77_int* incx, + const float* y, const f77_int* incy, float* rval ); -void PASTEF772(ds,dot,sub)( f77_int* n, - float* x, f77_int* incx, - float* y, f77_int* incy, +void PASTEF772(ds,dot,sub)( const f77_int* n, + const float* x, const f77_int* incx, + const float* y, const f77_int* incy, double* rval ); #endif diff --git a/frame/compat/cblas/f77_sub/f77_nrm2_sub.c b/frame/compat/cblas/f77_sub/f77_nrm2_sub.c index 126ab2397..7534d0cfb 100644 --- a/frame/compat/cblas/f77_sub/f77_nrm2_sub.c +++ b/frame/compat/cblas/f77_sub/f77_nrm2_sub.c @@ -43,8 +43,8 @@ #define GENTFUNCR2( ftype_x, ftype_r, chx, chr, blasname, blisname ) \ \ void PASTEF773(chr,chx,blasname,sub)( \ - f77_int* n, \ - ftype_x* x, f77_int* incx, \ + const f77_int* n, \ + const ftype_x* x, const f77_int* incx, \ ftype_r* rval \ ) \ { \ diff --git a/frame/compat/cblas/f77_sub/f77_nrm2_sub.h b/frame/compat/cblas/f77_sub/f77_nrm2_sub.h index 4686a19c4..d966f15f7 100644 --- a/frame/compat/cblas/f77_sub/f77_nrm2_sub.h +++ b/frame/compat/cblas/f77_sub/f77_nrm2_sub.h @@ -40,8 +40,8 @@ #define GENTPROTR2( ftype_x, ftype_r, chx, chr, blasname ) \ \ void PASTEF773(chr,chx,blasname,sub)( \ - f77_int* n, \ - ftype_x* x, f77_int* incx, \ + const f77_int* n, \ + const ftype_x* x, const f77_int* incx, \ ftype_r* rval \ ); diff --git a/frame/compat/cblas/src/cblas.h b/frame/compat/cblas/src/cblas.h index f91557e74..f8b4d43a4 100644 --- a/frame/compat/cblas/src/cblas.h +++ b/frame/compat/cblas/src/cblas.h @@ -5,8 +5,6 @@ /* * Enumerated and derived types */ -#define CBLAS_INDEX size_t /* this may vary between platforms */ - enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102}; enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113}; enum CBLAS_UPLO {CblasUpper=121, CblasLower=122}; @@ -22,52 +20,52 @@ extern "C" { * Prototypes for level 1 BLAS functions (complex are recast as routines) * =========================================================================== */ -float cblas_sdsdot(const int N, const float alpha, const float *X, - const int incX, const float *Y, const int incY); -double cblas_dsdot(const int N, const float *X, const int incX, const float *Y, - const int incY); -float cblas_sdot(const int N, const float *X, const int incX, - const float *Y, const int incY); -double cblas_ddot(const int N, const double *X, const int incX, - const double *Y, const int incY); +float cblas_sdsdot(f77_int N, float alpha, const float *X, + f77_int incX, const float *Y, f77_int incY); +double cblas_dsdot(f77_int N, const float *X, f77_int incX, const float *Y, + f77_int incY); +float cblas_sdot(f77_int N, const float *X, f77_int incX, + const float *Y, f77_int incY); +double cblas_ddot(f77_int N, const double *X, f77_int incX, + const double *Y, f77_int incY); /* * Functions having prefixes Z and C only */ -void cblas_cdotu_sub(const int N, const void *X, const int incX, - const void *Y, const int incY, void *dotu); -void cblas_cdotc_sub(const int N, const void *X, const int incX, - const void *Y, const int incY, void *dotc); +void cblas_cdotu_sub(f77_int N, const void *X, f77_int incX, + const void *Y, f77_int incY, void *dotu); +void cblas_cdotc_sub(f77_int N, const void *X, f77_int incX, + const void *Y, f77_int incY, void *dotc); -void cblas_zdotu_sub(const int N, const void *X, const int incX, - const void *Y, const int incY, void *dotu); -void cblas_zdotc_sub(const int N, const void *X, const int incX, - const void *Y, const int incY, void *dotc); +void cblas_zdotu_sub(f77_int N, const void *X, f77_int incX, + const void *Y, f77_int incY, void *dotu); +void cblas_zdotc_sub(f77_int N, const void *X, f77_int incX, + const void *Y, f77_int incY, void *dotc); /* * Functions having prefixes S D SC DZ */ -float cblas_snrm2(const int N, const float *X, const int incX); -float cblas_sasum(const int N, const float *X, const int incX); +float cblas_snrm2(f77_int N, const float *X, f77_int incX); +float cblas_sasum(f77_int N, const float *X, f77_int incX); -double cblas_dnrm2(const int N, const double *X, const int incX); -double cblas_dasum(const int N, const double *X, const int incX); +double cblas_dnrm2(f77_int N, const double *X, f77_int incX); +double cblas_dasum(f77_int N, const double *X, f77_int incX); -float cblas_scnrm2(const int N, const void *X, const int incX); -float cblas_scasum(const int N, const void *X, const int incX); +float cblas_scnrm2(f77_int N, const void *X, f77_int incX); +float cblas_scasum(f77_int N, const void *X, f77_int incX); -double cblas_dznrm2(const int N, const void *X, const int incX); -double cblas_dzasum(const int N, const void *X, const int incX); +double cblas_dznrm2(f77_int N, const void *X, f77_int incX); +double cblas_dzasum(f77_int N, const void *X, f77_int incX); /* * Functions having standard 4 prefixes (S D C Z) */ -CBLAS_INDEX cblas_isamax(const int N, const float *X, const int incX); -CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX); -CBLAS_INDEX cblas_icamax(const int N, const void *X, const int incX); -CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX); +f77_int cblas_isamax(f77_int N, const float *X, f77_int incX); +f77_int cblas_idamax(f77_int N, const double *X, f77_int incX); +f77_int cblas_icamax(f77_int N, const void *X, f77_int incX); +f77_int cblas_izamax(f77_int N, const void *X, f77_int incX); /* * =========================================================================== @@ -78,33 +76,33 @@ CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX); /* * Routines with standard 4 prefixes (s, d, c, z) */ -void cblas_sswap(const int N, float *X, const int incX, - float *Y, const int incY); -void cblas_scopy(const int N, const float *X, const int incX, - float *Y, const int incY); -void cblas_saxpy(const int N, const float alpha, const float *X, - const int incX, float *Y, const int incY); +void cblas_sswap(f77_int N, float *X, f77_int incX, + float *Y, f77_int incY); +void cblas_scopy(f77_int N, const float *X, f77_int incX, + float *Y, f77_int incY); +void cblas_saxpy(f77_int N, float alpha, const float *X, + f77_int incX, float *Y, f77_int incY); -void cblas_dswap(const int N, double *X, const int incX, - double *Y, const int incY); -void cblas_dcopy(const int N, const double *X, const int incX, - double *Y, const int incY); -void cblas_daxpy(const int N, const double alpha, const double *X, - const int incX, double *Y, const int incY); +void cblas_dswap(f77_int N, double *X, f77_int incX, + double *Y, f77_int incY); +void cblas_dcopy(f77_int N, const double *X, f77_int incX, + double *Y, f77_int incY); +void cblas_daxpy(f77_int N, double alpha, const double *X, + f77_int incX, double *Y, f77_int incY); -void cblas_cswap(const int N, void *X, const int incX, - void *Y, const int incY); -void cblas_ccopy(const int N, const void *X, const int incX, - void *Y, const int incY); -void cblas_caxpy(const int N, const void *alpha, const void *X, - const int incX, void *Y, const int incY); +void cblas_cswap(f77_int N, void *X, f77_int incX, + void *Y, f77_int incY); +void cblas_ccopy(f77_int N, const void *X, f77_int incX, + void *Y, f77_int incY); +void cblas_caxpy(f77_int N, const void *alpha, const void *X, + f77_int incX, void *Y, f77_int incY); -void cblas_zswap(const int N, void *X, const int incX, - void *Y, const int incY); -void cblas_zcopy(const int N, const void *X, const int incX, - void *Y, const int incY); -void cblas_zaxpy(const int N, const void *alpha, const void *X, - const int incX, void *Y, const int incY); +void cblas_zswap(f77_int N, void *X, f77_int incX, + void *Y, f77_int incY); +void cblas_zcopy(f77_int N, const void *X, f77_int incX, + void *Y, f77_int incY); +void cblas_zaxpy(f77_int N, const void *alpha, const void *X, + f77_int incX, void *Y, f77_int incY); /* @@ -112,28 +110,28 @@ void cblas_zaxpy(const int N, const void *alpha, const void *X, */ void cblas_srotg(float *a, float *b, float *c, float *s); void cblas_srotmg(float *d1, float *d2, float *b1, const float b2, float *P); -void cblas_srot(const int N, float *X, const int incX, - float *Y, const int incY, const float c, const float s); -void cblas_srotm(const int N, float *X, const int incX, - float *Y, const int incY, const float *P); +void cblas_srot(f77_int N, float *X, f77_int incX, + float *Y, f77_int incY, const float c, const float s); +void cblas_srotm(f77_int N, float *X, f77_int incX, + float *Y, f77_int incY, const float *P); void cblas_drotg(double *a, double *b, double *c, double *s); void cblas_drotmg(double *d1, double *d2, double *b1, const double b2, double *P); -void cblas_drot(const int N, double *X, const int incX, - double *Y, const int incY, const double c, const double s); -void cblas_drotm(const int N, double *X, const int incX, - double *Y, const int incY, const double *P); +void cblas_drot(f77_int N, double *X, f77_int incX, + double *Y, f77_int incY, const double c, const double s); +void cblas_drotm(f77_int N, double *X, f77_int incX, + double *Y, f77_int incY, const double *P); /* * Routines with S D C Z CS and ZD prefixes */ -void cblas_sscal(const int N, const float alpha, float *X, const int incX); -void cblas_dscal(const int N, const double alpha, double *X, const int incX); -void cblas_cscal(const int N, const void *alpha, void *X, const int incX); -void cblas_zscal(const int N, const void *alpha, void *X, const int incX); -void cblas_csscal(const int N, const float alpha, void *X, const int incX); -void cblas_zdscal(const int N, const double alpha, void *X, const int incX); +void cblas_sscal(f77_int N, float alpha, float *X, f77_int incX); +void cblas_dscal(f77_int N, double alpha, double *X, f77_int incX); +void cblas_cscal(f77_int N, const void *alpha, void *X, f77_int incX); +void cblas_zscal(f77_int N, const void *alpha, void *X, f77_int incX); +void cblas_csscal(f77_int N, float alpha, void *X, f77_int incX); +void cblas_zdscal(f77_int N, double alpha, void *X, f77_int incX); /* * =========================================================================== @@ -144,265 +142,265 @@ void cblas_zdscal(const int N, const double alpha, void *X, const int incX); /* * Routines with standard 4 prefixes (S, D, C, Z) */ -void cblas_sgemv(const enum CBLAS_ORDER order, - const enum CBLAS_TRANSPOSE TransA, const int M, const int N, - const float alpha, const float *A, const int lda, - const float *X, const int incX, const float beta, - float *Y, const int incY); -void cblas_sgbmv(const enum CBLAS_ORDER order, - const enum CBLAS_TRANSPOSE TransA, const int M, const int N, - const int KL, const int KU, const float alpha, - const float *A, const int lda, const float *X, - const int incX, const float beta, float *Y, const int incY); -void cblas_strmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const float *A, const int lda, - float *X, const int incX); -void cblas_stbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const int K, const float *A, const int lda, - float *X, const int incX); -void cblas_stpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const float *Ap, float *X, const int incX); -void cblas_strsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const float *A, const int lda, float *X, - const int incX); -void cblas_stbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const int K, const float *A, const int lda, - float *X, const int incX); -void cblas_stpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const float *Ap, float *X, const int incX); +void cblas_sgemv(enum CBLAS_ORDER order, + enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, + float alpha, const float *A, f77_int lda, + const float *X, f77_int incX, float beta, + float *Y, f77_int incY); +void cblas_sgbmv(enum CBLAS_ORDER order, + enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, + f77_int KL, f77_int KU, float alpha, + const float *A, f77_int lda, const float *X, + f77_int incX, float beta, float *Y, f77_int incY); +void cblas_strmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const float *A, f77_int lda, + float *X, f77_int incX); +void cblas_stbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, f77_int K, const float *A, f77_int lda, + float *X, f77_int incX); +void cblas_stpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const float *Ap, float *X, f77_int incX); +void cblas_strsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const float *A, f77_int lda, float *X, + f77_int incX); +void cblas_stbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, f77_int K, const float *A, f77_int lda, + float *X, f77_int incX); +void cblas_stpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const float *Ap, float *X, f77_int incX); -void cblas_dgemv(const enum CBLAS_ORDER order, - const enum CBLAS_TRANSPOSE TransA, const int M, const int N, - const double alpha, const double *A, const int lda, - const double *X, const int incX, const double beta, - double *Y, const int incY); -void cblas_dgbmv(const enum CBLAS_ORDER order, - const enum CBLAS_TRANSPOSE TransA, const int M, const int N, - const int KL, const int KU, const double alpha, - const double *A, const int lda, const double *X, - const int incX, const double beta, double *Y, const int incY); -void cblas_dtrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const double *A, const int lda, - double *X, const int incX); -void cblas_dtbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const int K, const double *A, const int lda, - double *X, const int incX); -void cblas_dtpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const double *Ap, double *X, const int incX); -void cblas_dtrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const double *A, const int lda, double *X, - const int incX); -void cblas_dtbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const int K, const double *A, const int lda, - double *X, const int incX); -void cblas_dtpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const double *Ap, double *X, const int incX); +void cblas_dgemv(enum CBLAS_ORDER order, + enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, + double alpha, const double *A, f77_int lda, + const double *X, f77_int incX, double beta, + double *Y, f77_int incY); +void cblas_dgbmv(enum CBLAS_ORDER order, + enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, + f77_int KL, f77_int KU, double alpha, + const double *A, f77_int lda, const double *X, + f77_int incX, double beta, double *Y, f77_int incY); +void cblas_dtrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const double *A, f77_int lda, + double *X, f77_int incX); +void cblas_dtbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, f77_int K, const double *A, f77_int lda, + double *X, f77_int incX); +void cblas_dtpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const double *Ap, double *X, f77_int incX); +void cblas_dtrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const double *A, f77_int lda, double *X, + f77_int incX); +void cblas_dtbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, f77_int K, const double *A, f77_int lda, + double *X, f77_int incX); +void cblas_dtpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const double *Ap, double *X, f77_int incX); -void cblas_cgemv(const enum CBLAS_ORDER order, - const enum CBLAS_TRANSPOSE TransA, const int M, const int N, - const void *alpha, const void *A, const int lda, - const void *X, const int incX, const void *beta, - void *Y, const int incY); -void cblas_cgbmv(const enum CBLAS_ORDER order, - const enum CBLAS_TRANSPOSE TransA, const int M, const int N, - const int KL, const int KU, const void *alpha, - const void *A, const int lda, const void *X, - const int incX, const void *beta, void *Y, const int incY); -void cblas_ctrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const void *A, const int lda, - void *X, const int incX); -void cblas_ctbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const int K, const void *A, const int lda, - void *X, const int incX); -void cblas_ctpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const void *Ap, void *X, const int incX); -void cblas_ctrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const void *A, const int lda, void *X, - const int incX); -void cblas_ctbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const int K, const void *A, const int lda, - void *X, const int incX); -void cblas_ctpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const void *Ap, void *X, const int incX); +void cblas_cgemv(enum CBLAS_ORDER order, + enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, + const void *alpha, const void *A, f77_int lda, + const void *X, f77_int incX, const void *beta, + void *Y, f77_int incY); +void cblas_cgbmv(enum CBLAS_ORDER order, + enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, + f77_int KL, f77_int KU, const void *alpha, + const void *A, f77_int lda, const void *X, + f77_int incX, const void *beta, void *Y, f77_int incY); +void cblas_ctrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const void *A, f77_int lda, + void *X, f77_int incX); +void cblas_ctbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, f77_int K, const void *A, f77_int lda, + void *X, f77_int incX); +void cblas_ctpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const void *Ap, void *X, f77_int incX); +void cblas_ctrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const void *A, f77_int lda, void *X, + f77_int incX); +void cblas_ctbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, f77_int K, const void *A, f77_int lda, + void *X, f77_int incX); +void cblas_ctpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const void *Ap, void *X, f77_int incX); -void cblas_zgemv(const enum CBLAS_ORDER order, - const enum CBLAS_TRANSPOSE TransA, const int M, const int N, - const void *alpha, const void *A, const int lda, - const void *X, const int incX, const void *beta, - void *Y, const int incY); -void cblas_zgbmv(const enum CBLAS_ORDER order, - const enum CBLAS_TRANSPOSE TransA, const int M, const int N, - const int KL, const int KU, const void *alpha, - const void *A, const int lda, const void *X, - const int incX, const void *beta, void *Y, const int incY); -void cblas_ztrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const void *A, const int lda, - void *X, const int incX); -void cblas_ztbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const int K, const void *A, const int lda, - void *X, const int incX); -void cblas_ztpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const void *Ap, void *X, const int incX); -void cblas_ztrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const void *A, const int lda, void *X, - const int incX); -void cblas_ztbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const int K, const void *A, const int lda, - void *X, const int incX); -void cblas_ztpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const void *Ap, void *X, const int incX); +void cblas_zgemv(enum CBLAS_ORDER order, + enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, + const void *alpha, const void *A, f77_int lda, + const void *X, f77_int incX, const void *beta, + void *Y, f77_int incY); +void cblas_zgbmv(enum CBLAS_ORDER order, + enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, + f77_int KL, f77_int KU, const void *alpha, + const void *A, f77_int lda, const void *X, + f77_int incX, const void *beta, void *Y, f77_int incY); +void cblas_ztrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const void *A, f77_int lda, + void *X, f77_int incX); +void cblas_ztbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, f77_int K, const void *A, f77_int lda, + void *X, f77_int incX); +void cblas_ztpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const void *Ap, void *X, f77_int incX); +void cblas_ztrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const void *A, f77_int lda, void *X, + f77_int incX); +void cblas_ztbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, f77_int K, const void *A, f77_int lda, + void *X, f77_int incX); +void cblas_ztpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const void *Ap, void *X, f77_int incX); /* * Routines with S and D prefixes only */ -void cblas_ssymv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const float alpha, const float *A, - const int lda, const float *X, const int incX, - const float beta, float *Y, const int incY); -void cblas_ssbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const int K, const float alpha, const float *A, - const int lda, const float *X, const int incX, - const float beta, float *Y, const int incY); -void cblas_sspmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const float alpha, const float *Ap, - const float *X, const int incX, - const float beta, float *Y, const int incY); -void cblas_sger(const enum CBLAS_ORDER order, const int M, const int N, - const float alpha, const float *X, const int incX, - const float *Y, const int incY, float *A, const int lda); -void cblas_ssyr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const float alpha, const float *X, - const int incX, float *A, const int lda); -void cblas_sspr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const float alpha, const float *X, - const int incX, float *Ap); -void cblas_ssyr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const float alpha, const float *X, - const int incX, const float *Y, const int incY, float *A, - const int lda); -void cblas_sspr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const float alpha, const float *X, - const int incX, const float *Y, const int incY, float *A); +void cblas_ssymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, float alpha, const float *A, + f77_int lda, const float *X, f77_int incX, + float beta, float *Y, f77_int incY); +void cblas_ssbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, f77_int K, float alpha, const float *A, + f77_int lda, const float *X, f77_int incX, + float beta, float *Y, f77_int incY); +void cblas_sspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, float alpha, const float *Ap, + const float *X, f77_int incX, + float beta, float *Y, f77_int incY); +void cblas_sger(enum CBLAS_ORDER order, f77_int M, f77_int N, + float alpha, const float *X, f77_int incX, + const float *Y, f77_int incY, float *A, f77_int lda); +void cblas_ssyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, float alpha, const float *X, + f77_int incX, float *A, f77_int lda); +void cblas_sspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, float alpha, const float *X, + f77_int incX, float *Ap); +void cblas_ssyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, float alpha, const float *X, + f77_int incX, const float *Y, f77_int incY, float *A, + f77_int lda); +void cblas_sspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, float alpha, const float *X, + f77_int incX, const float *Y, f77_int incY, float *A); -void cblas_dsymv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const double alpha, const double *A, - const int lda, const double *X, const int incX, - const double beta, double *Y, const int incY); -void cblas_dsbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const int K, const double alpha, const double *A, - const int lda, const double *X, const int incX, - const double beta, double *Y, const int incY); -void cblas_dspmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const double alpha, const double *Ap, - const double *X, const int incX, - const double beta, double *Y, const int incY); -void cblas_dger(const enum CBLAS_ORDER order, const int M, const int N, - const double alpha, const double *X, const int incX, - const double *Y, const int incY, double *A, const int lda); -void cblas_dsyr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const double alpha, const double *X, - const int incX, double *A, const int lda); -void cblas_dspr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const double alpha, const double *X, - const int incX, double *Ap); -void cblas_dsyr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const double alpha, const double *X, - const int incX, const double *Y, const int incY, double *A, - const int lda); -void cblas_dspr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const double alpha, const double *X, - const int incX, const double *Y, const int incY, double *A); +void cblas_dsymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, double alpha, const double *A, + f77_int lda, const double *X, f77_int incX, + double beta, double *Y, f77_int incY); +void cblas_dsbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, f77_int K, double alpha, const double *A, + f77_int lda, const double *X, f77_int incX, + double beta, double *Y, f77_int incY); +void cblas_dspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, double alpha, const double *Ap, + const double *X, f77_int incX, + double beta, double *Y, f77_int incY); +void cblas_dger(enum CBLAS_ORDER order, f77_int M, f77_int N, + double alpha, const double *X, f77_int incX, + const double *Y, f77_int incY, double *A, f77_int lda); +void cblas_dsyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, double alpha, const double *X, + f77_int incX, double *A, f77_int lda); +void cblas_dspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, double alpha, const double *X, + f77_int incX, double *Ap); +void cblas_dsyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, double alpha, const double *X, + f77_int incX, const double *Y, f77_int incY, double *A, + f77_int lda); +void cblas_dspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, double alpha, const double *X, + f77_int incX, const double *Y, f77_int incY, double *A); /* * Routines with C and Z prefixes only */ -void cblas_chemv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const void *alpha, const void *A, - const int lda, const void *X, const int incX, - const void *beta, void *Y, const int incY); -void cblas_chbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const int K, const void *alpha, const void *A, - const int lda, const void *X, const int incX, - const void *beta, void *Y, const int incY); -void cblas_chpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const void *alpha, const void *Ap, - const void *X, const int incX, - const void *beta, void *Y, const int incY); -void cblas_cgeru(const enum CBLAS_ORDER order, const int M, const int N, - const void *alpha, const void *X, const int incX, - const void *Y, const int incY, void *A, const int lda); -void cblas_cgerc(const enum CBLAS_ORDER order, const int M, const int N, - const void *alpha, const void *X, const int incX, - const void *Y, const int incY, void *A, const int lda); -void cblas_cher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const float alpha, const void *X, const int incX, - void *A, const int lda); -void cblas_chpr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const float alpha, const void *X, - const int incX, void *A); -void cblas_cher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, - const void *alpha, const void *X, const int incX, - const void *Y, const int incY, void *A, const int lda); -void cblas_chpr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, - const void *alpha, const void *X, const int incX, - const void *Y, const int incY, void *Ap); +void cblas_chemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, const void *alpha, const void *A, + f77_int lda, const void *X, f77_int incX, + const void *beta, void *Y, f77_int incY); +void cblas_chbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, f77_int K, const void *alpha, const void *A, + f77_int lda, const void *X, f77_int incX, + const void *beta, void *Y, f77_int incY); +void cblas_chpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, const void *alpha, const void *Ap, + const void *X, f77_int incX, + const void *beta, void *Y, f77_int incY); +void cblas_cgeru(enum CBLAS_ORDER order, f77_int M, f77_int N, + const void *alpha, const void *X, f77_int incX, + const void *Y, f77_int incY, void *A, f77_int lda); +void cblas_cgerc(enum CBLAS_ORDER order, f77_int M, f77_int N, + const void *alpha, const void *X, f77_int incX, + const void *Y, f77_int incY, void *A, f77_int lda); +void cblas_cher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, float alpha, const void *X, f77_int incX, + void *A, f77_int lda); +void cblas_chpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, float alpha, const void *X, + f77_int incX, void *A); +void cblas_cher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, + const void *alpha, const void *X, f77_int incX, + const void *Y, f77_int incY, void *A, f77_int lda); +void cblas_chpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, + const void *alpha, const void *X, f77_int incX, + const void *Y, f77_int incY, void *Ap); -void cblas_zhemv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const void *alpha, const void *A, - const int lda, const void *X, const int incX, - const void *beta, void *Y, const int incY); -void cblas_zhbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const int K, const void *alpha, const void *A, - const int lda, const void *X, const int incX, - const void *beta, void *Y, const int incY); -void cblas_zhpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const void *alpha, const void *Ap, - const void *X, const int incX, - const void *beta, void *Y, const int incY); -void cblas_zgeru(const enum CBLAS_ORDER order, const int M, const int N, - const void *alpha, const void *X, const int incX, - const void *Y, const int incY, void *A, const int lda); -void cblas_zgerc(const enum CBLAS_ORDER order, const int M, const int N, - const void *alpha, const void *X, const int incX, - const void *Y, const int incY, void *A, const int lda); -void cblas_zher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const double alpha, const void *X, const int incX, - void *A, const int lda); -void cblas_zhpr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const double alpha, const void *X, - const int incX, void *A); -void cblas_zher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, - const void *alpha, const void *X, const int incX, - const void *Y, const int incY, void *A, const int lda); -void cblas_zhpr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const int N, - const void *alpha, const void *X, const int incX, - const void *Y, const int incY, void *Ap); +void cblas_zhemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, const void *alpha, const void *A, + f77_int lda, const void *X, f77_int incX, + const void *beta, void *Y, f77_int incY); +void cblas_zhbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, f77_int K, const void *alpha, const void *A, + f77_int lda, const void *X, f77_int incX, + const void *beta, void *Y, f77_int incY); +void cblas_zhpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, const void *alpha, const void *Ap, + const void *X, f77_int incX, + const void *beta, void *Y, f77_int incY); +void cblas_zgeru(enum CBLAS_ORDER order, f77_int M, f77_int N, + const void *alpha, const void *X, f77_int incX, + const void *Y, f77_int incY, void *A, f77_int lda); +void cblas_zgerc(enum CBLAS_ORDER order, f77_int M, f77_int N, + const void *alpha, const void *X, f77_int incX, + const void *Y, f77_int incY, void *A, f77_int lda); +void cblas_zher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, double alpha, const void *X, f77_int incX, + void *A, f77_int lda); +void cblas_zhpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, double alpha, const void *X, + f77_int incX, void *A); +void cblas_zher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, + const void *alpha, const void *X, f77_int incX, + const void *Y, f77_int incY, void *A, f77_int lda); +void cblas_zhpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, f77_int N, + const void *alpha, const void *X, f77_int incX, + const void *Y, f77_int incY, void *Ap); /* * =========================================================================== @@ -413,161 +411,161 @@ void cblas_zhpr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, const /* * Routines with standard 4 prefixes (S, D, C, Z) */ -void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_TRANSPOSE TransB, const int M, const int N, - const int K, const float alpha, const float *A, - const int lda, const float *B, const int ldb, - const float beta, float *C, const int ldc); -void cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const int M, const int N, - const float alpha, const float *A, const int lda, - const float *B, const int ldb, const float beta, - float *C, const int ldc); -void cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const float alpha, const float *A, const int lda, - const float beta, float *C, const int ldc); -void cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const float alpha, const float *A, const int lda, - const float *B, const int ldb, const float beta, - float *C, const int ldc); -void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_DIAG Diag, const int M, const int N, - const float alpha, const float *A, const int lda, - float *B, const int ldb); -void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_DIAG Diag, const int M, const int N, - const float alpha, const float *A, const int lda, - float *B, const int ldb); +void cblas_sgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, + enum CBLAS_TRANSPOSE TransB, f77_int M, f77_int N, + f77_int K, float alpha, const float *A, + f77_int lda, const float *B, f77_int ldb, + float beta, float *C, f77_int ldc); +void cblas_ssymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, f77_int M, f77_int N, + float alpha, const float *A, f77_int lda, + const float *B, f77_int ldb, float beta, + float *C, f77_int ldc); +void cblas_ssyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, + float alpha, const float *A, f77_int lda, + float beta, float *C, f77_int ldc); +void cblas_ssyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, + float alpha, const float *A, f77_int lda, + const float *B, f77_int ldb, float beta, + float *C, f77_int ldc); +void cblas_strmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, + enum CBLAS_DIAG Diag, f77_int M, f77_int N, + float alpha, const float *A, f77_int lda, + float *B, f77_int ldb); +void cblas_strsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, + enum CBLAS_DIAG Diag, f77_int M, f77_int N, + float alpha, const float *A, f77_int lda, + float *B, f77_int ldb); -void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_TRANSPOSE TransB, const int M, const int N, - const int K, const double alpha, const double *A, - const int lda, const double *B, const int ldb, - const double beta, double *C, const int ldc); -void cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const int M, const int N, - const double alpha, const double *A, const int lda, - const double *B, const int ldb, const double beta, - double *C, const int ldc); -void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const double alpha, const double *A, const int lda, - const double beta, double *C, const int ldc); -void cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const double alpha, const double *A, const int lda, - const double *B, const int ldb, const double beta, - double *C, const int ldc); -void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_DIAG Diag, const int M, const int N, - const double alpha, const double *A, const int lda, - double *B, const int ldb); -void cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_DIAG Diag, const int M, const int N, - const double alpha, const double *A, const int lda, - double *B, const int ldb); +void cblas_dgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, + enum CBLAS_TRANSPOSE TransB, f77_int M, f77_int N, + f77_int K, double alpha, const double *A, + f77_int lda, const double *B, f77_int ldb, + double beta, double *C, f77_int ldc); +void cblas_dsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, f77_int M, f77_int N, + double alpha, const double *A, f77_int lda, + const double *B, f77_int ldb, double beta, + double *C, f77_int ldc); +void cblas_dsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, + double alpha, const double *A, f77_int lda, + double beta, double *C, f77_int ldc); +void cblas_dsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, + double alpha, const double *A, f77_int lda, + const double *B, f77_int ldb, double beta, + double *C, f77_int ldc); +void cblas_dtrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, + enum CBLAS_DIAG Diag, f77_int M, f77_int N, + double alpha, const double *A, f77_int lda, + double *B, f77_int ldb); +void cblas_dtrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, + enum CBLAS_DIAG Diag, f77_int M, f77_int N, + double alpha, const double *A, f77_int lda, + double *B, f77_int ldb); -void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_TRANSPOSE TransB, const int M, const int N, - const int K, const void *alpha, const void *A, - const int lda, const void *B, const int ldb, - const void *beta, void *C, const int ldc); -void cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const int M, const int N, - const void *alpha, const void *A, const int lda, - const void *B, const int ldb, const void *beta, - void *C, const int ldc); -void cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const void *alpha, const void *A, const int lda, - const void *beta, void *C, const int ldc); -void cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const void *alpha, const void *A, const int lda, - const void *B, const int ldb, const void *beta, - void *C, const int ldc); -void cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_DIAG Diag, const int M, const int N, - const void *alpha, const void *A, const int lda, - void *B, const int ldb); -void cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_DIAG Diag, const int M, const int N, - const void *alpha, const void *A, const int lda, - void *B, const int ldb); +void cblas_cgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, + enum CBLAS_TRANSPOSE TransB, f77_int M, f77_int N, + f77_int K, const void *alpha, const void *A, + f77_int lda, const void *B, f77_int ldb, + const void *beta, void *C, f77_int ldc); +void cblas_csymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, f77_int M, f77_int N, + const void *alpha, const void *A, f77_int lda, + const void *B, f77_int ldb, const void *beta, + void *C, f77_int ldc); +void cblas_csyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, + const void *alpha, const void *A, f77_int lda, + const void *beta, void *C, f77_int ldc); +void cblas_csyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, + const void *alpha, const void *A, f77_int lda, + const void *B, f77_int ldb, const void *beta, + void *C, f77_int ldc); +void cblas_ctrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, + enum CBLAS_DIAG Diag, f77_int M, f77_int N, + const void *alpha, const void *A, f77_int lda, + void *B, f77_int ldb); +void cblas_ctrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, + enum CBLAS_DIAG Diag, f77_int M, f77_int N, + const void *alpha, const void *A, f77_int lda, + void *B, f77_int ldb); -void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_TRANSPOSE TransB, const int M, const int N, - const int K, const void *alpha, const void *A, - const int lda, const void *B, const int ldb, - const void *beta, void *C, const int ldc); -void cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const int M, const int N, - const void *alpha, const void *A, const int lda, - const void *B, const int ldb, const void *beta, - void *C, const int ldc); -void cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const void *alpha, const void *A, const int lda, - const void *beta, void *C, const int ldc); -void cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const void *alpha, const void *A, const int lda, - const void *B, const int ldb, const void *beta, - void *C, const int ldc); -void cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_DIAG Diag, const int M, const int N, - const void *alpha, const void *A, const int lda, - void *B, const int ldb); -void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_DIAG Diag, const int M, const int N, - const void *alpha, const void *A, const int lda, - void *B, const int ldb); +void cblas_zgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, + enum CBLAS_TRANSPOSE TransB, f77_int M, f77_int N, + f77_int K, const void *alpha, const void *A, + f77_int lda, const void *B, f77_int ldb, + const void *beta, void *C, f77_int ldc); +void cblas_zsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, f77_int M, f77_int N, + const void *alpha, const void *A, f77_int lda, + const void *B, f77_int ldb, const void *beta, + void *C, f77_int ldc); +void cblas_zsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, + const void *alpha, const void *A, f77_int lda, + const void *beta, void *C, f77_int ldc); +void cblas_zsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, + const void *alpha, const void *A, f77_int lda, + const void *B, f77_int ldb, const void *beta, + void *C, f77_int ldc); +void cblas_ztrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, + enum CBLAS_DIAG Diag, f77_int M, f77_int N, + const void *alpha, const void *A, f77_int lda, + void *B, f77_int ldb); +void cblas_ztrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, + enum CBLAS_DIAG Diag, f77_int M, f77_int N, + const void *alpha, const void *A, f77_int lda, + void *B, f77_int ldb); /* * Routines with prefixes C and Z only */ -void cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const int M, const int N, - const void *alpha, const void *A, const int lda, - const void *B, const int ldb, const void *beta, - void *C, const int ldc); -void cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const float alpha, const void *A, const int lda, - const float beta, void *C, const int ldc); -void cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const void *alpha, const void *A, const int lda, - const void *B, const int ldb, const float beta, - void *C, const int ldc); +void cblas_chemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, f77_int M, f77_int N, + const void *alpha, const void *A, f77_int lda, + const void *B, f77_int ldb, const void *beta, + void *C, f77_int ldc); +void cblas_cherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, + float alpha, const void *A, f77_int lda, + float beta, void *C, f77_int ldc); +void cblas_cher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, + const void *alpha, const void *A, f77_int lda, + const void *B, f77_int ldb, float beta, + void *C, f77_int ldc); -void cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const int M, const int N, - const void *alpha, const void *A, const int lda, - const void *B, const int ldb, const void *beta, - void *C, const int ldc); -void cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const double alpha, const void *A, const int lda, - const double beta, void *C, const int ldc); -void cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const void *alpha, const void *A, const int lda, - const void *B, const int ldb, const double beta, - void *C, const int ldc); +void cblas_zhemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, f77_int M, f77_int N, + const void *alpha, const void *A, f77_int lda, + const void *B, f77_int ldb, const void *beta, + void *C, f77_int ldc); +void cblas_zherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, + double alpha, const void *A, f77_int lda, + double beta, void *C, f77_int ldc); +void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, + const void *alpha, const void *A, f77_int lda, + const void *B, f77_int ldb, double beta, + void *C, f77_int ldc); -void cblas_xerbla(int p, const char *rout, const char *form, ...); +void cblas_xerbla(f77_int p, const char *rout, const char *form, ...); #ifdef __cplusplus } diff --git a/frame/compat/cblas/src/cblas_caxpy.c b/frame/compat/cblas/src/cblas_caxpy.c index 69c8c2ac4..748d146c6 100644 --- a/frame/compat/cblas/src/cblas_caxpy.c +++ b/frame/compat/cblas/src/cblas_caxpy.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_caxpy.c @@ -14,8 +10,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_caxpy( const int N, const void *alpha, const void *X, - const int incX, void *Y, const int incY) +void cblas_caxpy( f77_int N, const void *alpha, const void *X, + f77_int incX, void *Y, f77_int incY) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; @@ -24,6 +20,6 @@ void cblas_caxpy( const int N, const void *alpha, const void *X, #define F77_incX incX #define F77_incY incY #endif - F77_caxpy( &F77_N, alpha, X, &F77_incX, Y, &F77_incY); + F77_caxpy( &F77_N, (scomplex*)alpha, (scomplex*)X, &F77_incX, (scomplex*)Y, &F77_incY); } #endif diff --git a/frame/compat/cblas/src/cblas_ccopy.c b/frame/compat/cblas/src/cblas_ccopy.c index fbd7b7172..8871680b1 100644 --- a/frame/compat/cblas/src/cblas_ccopy.c +++ b/frame/compat/cblas/src/cblas_ccopy.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ccopy.c @@ -14,8 +10,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_ccopy( const int N, const void *X, - const int incX, void *Y, const int incY) +void cblas_ccopy( f77_int N, const void *X, + f77_int incX, void *Y, f77_int incY) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; @@ -24,6 +20,6 @@ void cblas_ccopy( const int N, const void *X, #define F77_incX incX #define F77_incY incY #endif - F77_ccopy( &F77_N, X, &F77_incX, Y, &F77_incY); + F77_ccopy( &F77_N, (scomplex*)X, &F77_incX, (scomplex*)Y, &F77_incY); } #endif diff --git a/frame/compat/cblas/src/cblas_cdotc_sub.c b/frame/compat/cblas/src/cblas_cdotc_sub.c index 734db9748..d0800ca4f 100644 --- a/frame/compat/cblas/src/cblas_cdotc_sub.c +++ b/frame/compat/cblas/src/cblas_cdotc_sub.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_cdotc_sub.c @@ -15,8 +11,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_cdotc_sub( const int N, const void *X, const int incX, - const void *Y, const int incY,void *dotc) +void cblas_cdotc_sub( f77_int N, const void *X, f77_int incX, + const void *Y, f77_int incY,void *dotc) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; @@ -25,6 +21,6 @@ void cblas_cdotc_sub( const int N, const void *X, const int incX, #define F77_incX incX #define F77_incY incY #endif - F77_cdotc_sub( &F77_N, X, &F77_incX, Y, &F77_incY, dotc); + F77_cdotc_sub( &F77_N, (scomplex*)X, &F77_incX, (scomplex*)Y, &F77_incY, (scomplex*)dotc); } #endif diff --git a/frame/compat/cblas/src/cblas_cdotu_sub.c b/frame/compat/cblas/src/cblas_cdotu_sub.c index 7e0890b57..fd14f5bb4 100644 --- a/frame/compat/cblas/src/cblas_cdotu_sub.c +++ b/frame/compat/cblas/src/cblas_cdotu_sub.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_cdotu_sub.f @@ -15,8 +11,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_cdotu_sub( const int N, const void *X, - const int incX, const void *Y, const int incY,void *dotu) +void cblas_cdotu_sub( f77_int N, const void *X, + f77_int incX, const void *Y, f77_int incY,void *dotu) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; @@ -25,6 +21,6 @@ void cblas_cdotu_sub( const int N, const void *X, #define F77_incX incX #define F77_incY incY #endif - F77_cdotu_sub( &F77_N, X, &F77_incX, Y, &F77_incY, dotu); + F77_cdotu_sub( &F77_N, (scomplex*)X, &F77_incX, (scomplex*)Y, &F77_incY, (scomplex*)dotu); } #endif diff --git a/frame/compat/cblas/src/cblas_cgbmv.c b/frame/compat/cblas/src/cblas_cgbmv.c index ad04e5101..74872186e 100644 --- a/frame/compat/cblas/src/cblas_cgbmv.c +++ b/frame/compat/cblas/src/cblas_cgbmv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_cgbmv.c @@ -15,12 +11,12 @@ #include #include "cblas.h" #include "cblas_f77.h" -void cblas_cgbmv(const enum CBLAS_ORDER order, - const enum CBLAS_TRANSPOSE TransA, const int M, const int N, - const int KL, const int KU, - const void *alpha, const void *A, const int lda, - const void *X, const int incX, const void *beta, - void *Y, const int incY) +void cblas_cgbmv(enum CBLAS_ORDER order, + enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, + f77_int KL, f77_int KU, + const void *alpha, const void *A, f77_int lda, + const void *X, f77_int incX, const void *beta, + void *Y, f77_int incY) { char TA; #ifdef F77_CHAR @@ -37,7 +33,7 @@ void cblas_cgbmv(const enum CBLAS_ORDER order, #define F77_lda lda #define F77_KL KL #define F77_KU KU - #define F77_incX incx + #define F77_incX incX #define F77_incY incY #endif int n=0, i=0; @@ -65,8 +61,8 @@ void cblas_cgbmv(const enum CBLAS_ORDER order, #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); #endif - F77_cgbmv(F77_TA, &F77_M, &F77_N, &F77_KL, &F77_KU, alpha, - A, &F77_lda, X, &F77_incX, beta, Y, &F77_incY); + F77_cgbmv(F77_TA, &F77_M, &F77_N, &F77_KL, &F77_KU, (scomplex*)alpha, + (scomplex*)A, &F77_lda, (scomplex*)X, &F77_incX, (scomplex*)beta, (scomplex*)Y, &F77_incY); } else if (order == CblasRowMajor) { @@ -109,7 +105,7 @@ void cblas_cgbmv(const enum CBLAS_ORDER order, #ifdef F77_INT F77_incX = 1; #else - incx = 1; + incX = 1; #endif if( incY > 0 ) @@ -146,11 +142,11 @@ void cblas_cgbmv(const enum CBLAS_ORDER order, F77_TA = C2F_CHAR(&TA); #endif if (TransA == CblasConjTrans) - F77_cgbmv(F77_TA, &F77_N, &F77_M, &F77_KU, &F77_KL, ALPHA, - A ,&F77_lda, x,&F77_incX, BETA, Y, &F77_incY); + F77_cgbmv(F77_TA, &F77_N, &F77_M, &F77_KU, &F77_KL, (scomplex*)ALPHA, + (scomplex*)A ,&F77_lda, (scomplex*)x,&F77_incX, (scomplex*)BETA, (scomplex*)Y, &F77_incY); else - F77_cgbmv(F77_TA, &F77_N, &F77_M, &F77_KU, &F77_KL, alpha, - A ,&F77_lda, x,&F77_incX, beta, Y, &F77_incY); + F77_cgbmv(F77_TA, &F77_N, &F77_M, &F77_KU, &F77_KL, (scomplex*)alpha, + (scomplex*)A ,&F77_lda, (scomplex*)x,&F77_incX, (scomplex*)beta, (scomplex*)Y, &F77_incY); if (TransA == CblasConjTrans) { if (x != X) free(x); diff --git a/frame/compat/cblas/src/cblas_cgemm.c b/frame/compat/cblas/src/cblas_cgemm.c index 4b1e5175c..5680a19f6 100644 --- a/frame/compat/cblas/src/cblas_cgemm.c +++ b/frame/compat/cblas/src/cblas_cgemm.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_TRANSPOSE TransB, const int M, const int N, - const int K, const void *alpha, const void *A, - const int lda, const void *B, const int ldb, - const void *beta, void *C, const int ldc) +void cblas_cgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, + enum CBLAS_TRANSPOSE TransB, f77_int M, f77_int N, + f77_int K, const void *alpha, const void *A, + f77_int lda, const void *B, f77_int ldb, + const void *beta, void *C, f77_int ldc) { char TA, TB; #ifdef F77_CHAR @@ -75,8 +71,8 @@ void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA F77_TB = C2F_CHAR(&TB); #endif - F77_cgemm(F77_TA, F77_TB, &F77_M, &F77_N, &F77_K, alpha, A, - &F77_lda, B, &F77_ldb, beta, C, &F77_ldc); + F77_cgemm(F77_TA, F77_TB, &F77_M, &F77_N, &F77_K, (scomplex*)alpha, (scomplex*)A, + &F77_lda, (scomplex*)B, &F77_ldb, (scomplex*)beta, (scomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; @@ -105,8 +101,8 @@ void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA F77_TB = C2F_CHAR(&TB); #endif - F77_cgemm(F77_TA, F77_TB, &F77_N, &F77_M, &F77_K, alpha, B, - &F77_ldb, A, &F77_lda, beta, C, &F77_ldc); + F77_cgemm(F77_TA, F77_TB, &F77_N, &F77_M, &F77_K, (scomplex*)alpha, (scomplex*)B, + &F77_ldb, (scomplex*)A, &F77_lda, (scomplex*)beta, (scomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_cgemm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; diff --git a/frame/compat/cblas/src/cblas_cgemv.c b/frame/compat/cblas/src/cblas_cgemv.c index 10e1ac0ef..e5d226262 100644 --- a/frame/compat/cblas/src/cblas_cgemv.c +++ b/frame/compat/cblas/src/cblas_cgemv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_cgemv.c @@ -15,11 +11,11 @@ #include #include "cblas.h" #include "cblas_f77.h" -void cblas_cgemv(const enum CBLAS_ORDER order, - const enum CBLAS_TRANSPOSE TransA, const int M, const int N, - const void *alpha, const void *A, const int lda, - const void *X, const int incX, const void *beta, - void *Y, const int incY) +void cblas_cgemv(enum CBLAS_ORDER order, + enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, + const void *alpha, const void *A, f77_int lda, + const void *X, f77_int incX, const void *beta, + void *Y, f77_int incY) { char TA; #ifdef F77_CHAR @@ -33,7 +29,7 @@ void cblas_cgemv(const enum CBLAS_ORDER order, #define F77_M M #define F77_N N #define F77_lda lda - #define F77_incX incx + #define F77_incX incX #define F77_incY incY #endif @@ -64,8 +60,8 @@ void cblas_cgemv(const enum CBLAS_ORDER order, #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); #endif - F77_cgemv(F77_TA, &F77_M, &F77_N, alpha, A, &F77_lda, X, &F77_incX, - beta, Y, &F77_incY); + F77_cgemv(F77_TA, &F77_M, &F77_N, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)X, &F77_incX, + (scomplex*)beta, (scomplex*)Y, &F77_incY); } else if (order == CblasRowMajor) { @@ -141,11 +137,11 @@ void cblas_cgemv(const enum CBLAS_ORDER order, F77_TA = C2F_CHAR(&TA); #endif if (TransA == CblasConjTrans) - F77_cgemv(F77_TA, &F77_N, &F77_M, ALPHA, A, &F77_lda, stx, - &F77_incX, BETA, Y, &F77_incY); + F77_cgemv(F77_TA, &F77_N, &F77_M, (scomplex*)ALPHA, (scomplex*)A, &F77_lda, (scomplex*)stx, + &F77_incX, (scomplex*)BETA, (scomplex*)Y, &F77_incY); else - F77_cgemv(F77_TA, &F77_N, &F77_M, alpha, A, &F77_lda, x, - &F77_incX, beta, Y, &F77_incY); + F77_cgemv(F77_TA, &F77_N, &F77_M, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)x, + &F77_incX, (scomplex*)beta, (scomplex*)Y, &F77_incY); if (TransA == CblasConjTrans) { diff --git a/frame/compat/cblas/src/cblas_cgerc.c b/frame/compat/cblas/src/cblas_cgerc.c index c4e682c68..dc582dd04 100644 --- a/frame/compat/cblas/src/cblas_cgerc.c +++ b/frame/compat/cblas/src/cblas_cgerc.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_cgerc.c @@ -15,9 +11,9 @@ #include #include "cblas.h" #include "cblas_f77.h" -void cblas_cgerc(const enum CBLAS_ORDER order, const int M, const int N, - const void *alpha, const void *X, const int incX, - const void *Y, const int incY, void *A, const int lda) +void cblas_cgerc(enum CBLAS_ORDER order, f77_int M, f77_int N, + const void *alpha, const void *X, f77_int incX, + const void *Y, f77_int incY, void *A, f77_int lda) { #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; @@ -25,7 +21,7 @@ void cblas_cgerc(const enum CBLAS_ORDER order, const int M, const int N, #define F77_M M #define F77_N N #define F77_incX incX - #define F77_incY incy + #define F77_incY incY #define F77_lda lda #endif @@ -39,7 +35,7 @@ void cblas_cgerc(const enum CBLAS_ORDER order, const int M, const int N, CBLAS_CallFromC = 1; if (order == CblasColMajor) { - F77_cgerc( &F77_M, &F77_N, alpha, X, &F77_incX, Y, &F77_incY, A, + F77_cgerc( &F77_M, &F77_N, (scomplex*)alpha, (scomplex*)X, &F77_incX, (scomplex*)Y, &F77_incY, (scomplex*)A, &F77_lda); } else if (order == CblasRowMajor) { @@ -73,12 +69,12 @@ void cblas_cgerc(const enum CBLAS_ORDER order, const int M, const int N, #ifdef F77_INT F77_incY = 1; #else - incy = 1; + incY = 1; #endif } else y = (float *) Y; - F77_cgeru( &F77_N, &F77_M, alpha, y, &F77_incY, X, &F77_incX, A, + F77_cgeru( &F77_N, &F77_M, (scomplex*)alpha, (scomplex*)y, &F77_incY, (scomplex*)X, &F77_incX, (scomplex*)A, &F77_lda); if(Y!=y) free(y); diff --git a/frame/compat/cblas/src/cblas_cgeru.c b/frame/compat/cblas/src/cblas_cgeru.c index 8e680ace8..6339ef08a 100644 --- a/frame/compat/cblas/src/cblas_cgeru.c +++ b/frame/compat/cblas/src/cblas_cgeru.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_cgeru.c @@ -13,9 +9,9 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_cgeru(const enum CBLAS_ORDER order, const int M, const int N, - const void *alpha, const void *X, const int incX, - const void *Y, const int incY, void *A, const int lda) +void cblas_cgeru(enum CBLAS_ORDER order, f77_int M, f77_int N, + const void *alpha, const void *X, f77_int incX, + const void *Y, f77_int incY, void *A, f77_int lda) { #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; @@ -35,13 +31,13 @@ void cblas_cgeru(const enum CBLAS_ORDER order, const int M, const int N, if (order == CblasColMajor) { - F77_cgeru( &F77_M, &F77_N, alpha, X, &F77_incX, Y, &F77_incY, A, + F77_cgeru( &F77_M, &F77_N, (scomplex*)alpha, (scomplex*)X, &F77_incX, (scomplex*)Y, &F77_incY, (scomplex*)A, &F77_lda); } else if (order == CblasRowMajor) { RowMajorStrg = 1; - F77_cgeru( &F77_N, &F77_M, alpha, Y, &F77_incY, X, &F77_incX, A, + F77_cgeru( &F77_N, &F77_M, (scomplex*)alpha, (scomplex*)Y, &F77_incY, (scomplex*)X, &F77_incX, (scomplex*)A, &F77_lda); } else cblas_xerbla(1, "cblas_cgeru","Illegal Order setting, %d\n", order); diff --git a/frame/compat/cblas/src/cblas_chbmv.c b/frame/compat/cblas/src/cblas_chbmv.c index 10ec5a4dd..463f8ef25 100644 --- a/frame/compat/cblas/src/cblas_chbmv.c +++ b/frame/compat/cblas/src/cblas_chbmv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_chbmv.c @@ -15,11 +11,11 @@ #include "cblas_f77.h" #include #include -void cblas_chbmv(const enum CBLAS_ORDER order, - const enum CBLAS_UPLO Uplo,const int N,const int K, - const void *alpha, const void *A, const int lda, - const void *X, const int incX, const void *beta, - void *Y, const int incY) +void cblas_chbmv(enum CBLAS_ORDER order, + enum CBLAS_UPLO Uplo,f77_int N,f77_int K, + const void *alpha, const void *A, f77_int lda, + const void *X, f77_int incX, const void *beta, + void *Y, f77_int incY) { char UL; #ifdef F77_CHAR @@ -33,7 +29,7 @@ void cblas_chbmv(const enum CBLAS_ORDER order, #define F77_N N #define F77_K K #define F77_lda lda - #define F77_incX incx + #define F77_incX incX #define F77_incY incY #endif int n, i=0; @@ -60,8 +56,8 @@ void cblas_chbmv(const enum CBLAS_ORDER order, #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif - F77_chbmv(F77_UL, &F77_N, &F77_K, alpha, A, &F77_lda, X, - &F77_incX, beta, Y, &F77_incY); + F77_chbmv(F77_UL, &F77_N, &F77_K, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)X, + &F77_incX, (scomplex*)beta, (scomplex*)Y, &F77_incY); } else if (order == CblasRowMajor) { @@ -102,7 +98,7 @@ void cblas_chbmv(const enum CBLAS_ORDER order, #ifdef F77_INT F77_incX = 1; #else - incx = 1; + incX = 1; #endif if(incY > 0) @@ -134,8 +130,8 @@ void cblas_chbmv(const enum CBLAS_ORDER order, #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif - F77_chbmv(F77_UL, &F77_N, &F77_K, ALPHA, - A ,&F77_lda, x,&F77_incX, BETA, Y, &F77_incY); + F77_chbmv(F77_UL, &F77_N, &F77_K, (scomplex*)ALPHA, + (scomplex*)A ,&F77_lda, (scomplex*)x,&F77_incX, (scomplex*)BETA, (scomplex*)Y, &F77_incY); } else { diff --git a/frame/compat/cblas/src/cblas_chemm.c b/frame/compat/cblas/src/cblas_chemm.c index ccab2f0bb..e8e1e374d 100644 --- a/frame/compat/cblas/src/cblas_chemm.c +++ b/frame/compat/cblas/src/cblas_chemm.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const int M, const int N, - const void *alpha, const void *A, const int lda, - const void *B, const int ldb, const void *beta, - void *C, const int ldc) +void cblas_chemm(enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, f77_int M, f77_int N, + const void *alpha, const void *A, f77_int lda, + const void *B, f77_int ldb, const void *beta, + void *C, f77_int ldc) { char SD, UL; #ifdef F77_CHAR @@ -72,8 +68,8 @@ void cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, F77_SD = C2F_CHAR(&SD); #endif - F77_chemm(F77_SD, F77_UL, &F77_M, &F77_N, alpha, A, &F77_lda, - B, &F77_ldb, beta, C, &F77_ldc); + F77_chemm(F77_SD, F77_UL, &F77_M, &F77_N, (scomplex*)alpha, (scomplex*)A, &F77_lda, + (scomplex*)B, &F77_ldb, (scomplex*)beta, (scomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; @@ -102,8 +98,8 @@ void cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, F77_SD = C2F_CHAR(&SD); #endif - F77_chemm(F77_SD, F77_UL, &F77_N, &F77_M, alpha, A, - &F77_lda, B, &F77_ldb, beta, C, &F77_ldc); + F77_chemm(F77_SD, F77_UL, &F77_N, &F77_M, (scomplex*)alpha, (scomplex*)A, + &F77_lda, (scomplex*)B, &F77_ldb, (scomplex*)beta, (scomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_chemm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; diff --git a/frame/compat/cblas/src/cblas_chemv.c b/frame/compat/cblas/src/cblas_chemv.c index 22def1465..b1df3ef12 100644 --- a/frame/compat/cblas/src/cblas_chemv.c +++ b/frame/compat/cblas/src/cblas_chemv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_chemv.c @@ -15,11 +11,11 @@ #include #include "cblas.h" #include "cblas_f77.h" -void cblas_chemv(const enum CBLAS_ORDER order, - const enum CBLAS_UPLO Uplo, const int N, - const void *alpha, const void *A, const int lda, - const void *X, const int incX, const void *beta, - void *Y, const int incY) +void cblas_chemv(enum CBLAS_ORDER order, + enum CBLAS_UPLO Uplo, f77_int N, + const void *alpha, const void *A, f77_int lda, + const void *X, f77_int incX, const void *beta, + void *Y, f77_int incY) { char UL; #ifdef F77_CHAR @@ -32,7 +28,7 @@ void cblas_chemv(const enum CBLAS_ORDER order, #else #define F77_N N #define F77_lda lda - #define F77_incX incx + #define F77_incX incX #define F77_incY incY #endif int n=0, i=0; @@ -60,8 +56,8 @@ void cblas_chemv(const enum CBLAS_ORDER order, #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif - F77_chemv(F77_UL, &F77_N, alpha, A, &F77_lda, X, &F77_incX, - beta, Y, &F77_incY); + F77_chemv(F77_UL, &F77_N, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)X, &F77_incX, + (scomplex*)beta, (scomplex*)Y, &F77_incY); } else if (order == CblasRowMajor) { @@ -102,7 +98,7 @@ void cblas_chemv(const enum CBLAS_ORDER order, #ifdef F77_INT F77_incX = 1; #else - incx = 1; + incX = 1; #endif if(incY > 0) @@ -135,8 +131,8 @@ void cblas_chemv(const enum CBLAS_ORDER order, #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif - F77_chemv(F77_UL, &F77_N, ALPHA, A, &F77_lda, x, &F77_incX, - BETA, Y, &F77_incY); + F77_chemv(F77_UL, &F77_N, (scomplex*)ALPHA, (scomplex*)A, &F77_lda, (scomplex*)x, &F77_incX, + (scomplex*)BETA, (scomplex*)Y, &F77_incY); } else { diff --git a/frame/compat/cblas/src/cblas_cher.c b/frame/compat/cblas/src/cblas_cher.c index 6f8f805ed..05e1c684a 100644 --- a/frame/compat/cblas/src/cblas_cher.c +++ b/frame/compat/cblas/src/cblas_cher.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_cher.c @@ -15,9 +11,9 @@ #include #include "cblas.h" #include "cblas_f77.h" -void cblas_cher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const float alpha, const void *X, const int incX - ,void *A, const int lda) +void cblas_cher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, float alpha, const void *X, f77_int incX + ,void *A, f77_int lda) { char UL; #ifdef F77_CHAR @@ -31,7 +27,7 @@ void cblas_cher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, #else #define F77_N N #define F77_lda lda - #define F77_incX incx + #define F77_incX incX #endif int n, i, tincx; float *x=(float *)X, *xx=(float *)X, *tx, *st; @@ -56,7 +52,7 @@ void cblas_cher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_UL = C2F_CHAR(&UL); #endif - F77_cher(F77_UL, &F77_N, &alpha, X, &F77_incX, A, &F77_lda); + F77_cher(F77_UL, &F77_N, &alpha, (scomplex*)X, &F77_incX, (scomplex*)A, &F77_lda); } else if (order == CblasRowMajor) { @@ -101,11 +97,11 @@ void cblas_cher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, #ifdef F77_INT F77_incX = 1; #else - incx = 1; + incX = 1; #endif } else x = (float *) X; - F77_cher(F77_UL, &F77_N, &alpha, x, &F77_incX, A, &F77_lda); + F77_cher(F77_UL, &F77_N, &alpha, (scomplex*)x, &F77_incX, (scomplex*)A, &F77_lda); } else { cblas_xerbla(1, "cblas_cher","Illegal Order setting, %d\n", order); diff --git a/frame/compat/cblas/src/cblas_cher2.c b/frame/compat/cblas/src/cblas_cher2.c index 8ce4a28a6..9b82423d6 100644 --- a/frame/compat/cblas/src/cblas_cher2.c +++ b/frame/compat/cblas/src/cblas_cher2.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_cher2.c @@ -15,9 +11,9 @@ #include #include "cblas.h" #include "cblas_f77.h" -void cblas_cher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const void *alpha, const void *X, const int incX, - const void *Y, const int incY, void *A, const int lda) +void cblas_cher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, const void *alpha, const void *X, f77_int incX, + const void *Y, f77_int incY, void *A, f77_int lda) { char UL; #ifdef F77_CHAR @@ -31,8 +27,8 @@ void cblas_cher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, #else #define F77_N N #define F77_lda lda - #define F77_incX incx - #define F77_incY incy + #define F77_incX incX + #define F77_incY incY #endif int n, i, j, tincx, tincy; float *x=(float *)X, *xx=(float *)X, *y=(float *)Y, @@ -58,8 +54,8 @@ void cblas_cher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_UL = C2F_CHAR(&UL); #endif - F77_cher2(F77_UL, &F77_N, alpha, X, &F77_incX, - Y, &F77_incY, A, &F77_lda); + F77_cher2(F77_UL, &F77_N, (scomplex*)alpha, (scomplex*)X, &F77_incX, + (scomplex*)Y, &F77_incY, (scomplex*)A, &F77_lda); } else if (order == CblasRowMajor) { @@ -130,16 +126,16 @@ void cblas_cher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_incX = 1; F77_incY = 1; #else - incx = 1; - incy = 1; + incX = 1; + incY = 1; #endif } else { x = (float *) X; y = (float *) Y; } - F77_cher2(F77_UL, &F77_N, alpha, y, &F77_incY, x, - &F77_incX, A, &F77_lda); + F77_cher2(F77_UL, &F77_N, (scomplex*)alpha, (scomplex*)y, &F77_incY, (scomplex*)x, + &F77_incX, (scomplex*)A, &F77_lda); } else { cblas_xerbla(1, "cblas_cher2","Illegal Order setting, %d\n", order); diff --git a/frame/compat/cblas/src/cblas_cher2k.c b/frame/compat/cblas/src/cblas_cher2k.c index 579d41279..fcd1c4b01 100644 --- a/frame/compat/cblas/src/cblas_cher2k.c +++ b/frame/compat/cblas/src/cblas_cher2k.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const void *alpha, const void *A, const int lda, - const void *B, const int ldb, const float beta, - void *C, const int ldc) +void cblas_cher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, + const void *alpha, const void *A, f77_int lda, + const void *B, f77_int ldb, float beta, + void *C, f77_int ldc) { char UL, TR; #ifdef F77_CHAR @@ -77,7 +73,7 @@ void cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, F77_TR = C2F_CHAR(&TR); #endif - F77_cher2k(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc); + F77_cher2k(F77_UL, F77_TR, &F77_N, &F77_K, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)B, &F77_ldb, &beta, (scomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; @@ -108,7 +104,7 @@ void cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, ALPHA[0]= *alp; ALPHA[1]= -alp[1]; - F77_cher2k(F77_UL,F77_TR, &F77_N, &F77_K, ALPHA, A, &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc); + F77_cher2k(F77_UL,F77_TR, &F77_N, &F77_K, (scomplex*)ALPHA, (scomplex*)A, &F77_lda, (scomplex*)B, &F77_ldb, &beta, (scomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_cher2k", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; diff --git a/frame/compat/cblas/src/cblas_cherk.c b/frame/compat/cblas/src/cblas_cherk.c index f4980b6b8..80bc4a7a8 100644 --- a/frame/compat/cblas/src/cblas_cherk.c +++ b/frame/compat/cblas/src/cblas_cherk.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,10 +11,10 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const float alpha, const void *A, const int lda, - const float beta, void *C, const int ldc) +void cblas_cherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, + float alpha, const void *A, f77_int lda, + float beta, void *C, f77_int ldc) { char UL, TR; #ifdef F77_CHAR @@ -71,8 +67,8 @@ void cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, F77_TR = C2F_CHAR(&TR); #endif - F77_cherk(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, A, &F77_lda, - &beta, C, &F77_ldc); + F77_cherk(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, (scomplex*)A, &F77_lda, + &beta, (scomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; @@ -101,8 +97,8 @@ void cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, F77_SD = C2F_CHAR(&SD); #endif - F77_cherk(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, A, &F77_lda, - &beta, C, &F77_ldc); + F77_cherk(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, (scomplex*)A, &F77_lda, + &beta, (scomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_cherk", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; diff --git a/frame/compat/cblas/src/cblas_chpmv.c b/frame/compat/cblas/src/cblas_chpmv.c index 04217ba37..5857070a0 100644 --- a/frame/compat/cblas/src/cblas_chpmv.c +++ b/frame/compat/cblas/src/cblas_chpmv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_chpmv.c @@ -15,11 +11,11 @@ #include #include "cblas.h" #include "cblas_f77.h" -void cblas_chpmv(const enum CBLAS_ORDER order, - const enum CBLAS_UPLO Uplo,const int N, +void cblas_chpmv(enum CBLAS_ORDER order, + enum CBLAS_UPLO Uplo,f77_int N, const void *alpha, const void *AP, - const void *X, const int incX, const void *beta, - void *Y, const int incY) + const void *X, f77_int incX, const void *beta, + void *Y, f77_int incY) { char UL; #ifdef F77_CHAR @@ -31,7 +27,7 @@ void cblas_chpmv(const enum CBLAS_ORDER order, F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N - #define F77_incX incx + #define F77_incX incX #define F77_incY incY #endif int n, i=0; @@ -58,8 +54,8 @@ void cblas_chpmv(const enum CBLAS_ORDER order, #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif - F77_chpmv(F77_UL, &F77_N, alpha, AP, X, - &F77_incX, beta, Y, &F77_incY); + F77_chpmv(F77_UL, &F77_N, (scomplex*)alpha, (scomplex*)AP, (scomplex*)X, + &F77_incX, (scomplex*)beta, (scomplex*)Y, &F77_incY); } else if (order == CblasRowMajor) { @@ -100,7 +96,7 @@ void cblas_chpmv(const enum CBLAS_ORDER order, #ifdef F77_INT F77_incX = 1; #else - incx = 1; + incX = 1; #endif if(incY > 0) @@ -134,8 +130,8 @@ void cblas_chpmv(const enum CBLAS_ORDER order, F77_UL = C2F_CHAR(&UL); #endif - F77_chpmv(F77_UL, &F77_N, ALPHA, - AP, x, &F77_incX, BETA, Y, &F77_incY); + F77_chpmv(F77_UL, &F77_N, (scomplex*)ALPHA, + (scomplex*)AP, (scomplex*)x, &F77_incX, (scomplex*)BETA, (scomplex*)Y, &F77_incY); } else { diff --git a/frame/compat/cblas/src/cblas_chpr.c b/frame/compat/cblas/src/cblas_chpr.c index c04869bc6..cd2f5baa3 100644 --- a/frame/compat/cblas/src/cblas_chpr.c +++ b/frame/compat/cblas/src/cblas_chpr.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_chpr.c @@ -15,9 +11,9 @@ #include #include "cblas.h" #include "cblas_f77.h" -void cblas_chpr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const float alpha, const void *X, - const int incX, void *A) +void cblas_chpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, float alpha, const void *X, + f77_int incX, void *A) { char UL; #ifdef F77_CHAR @@ -30,7 +26,7 @@ void cblas_chpr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N - #define F77_incX incx + #define F77_incX incX #endif int n, i, tincx; float *x=(float *)X, *xx=(float *)X, *tx, *st; @@ -55,7 +51,7 @@ void cblas_chpr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_UL = C2F_CHAR(&UL); #endif - F77_chpr(F77_UL, &F77_N, &alpha, X, &F77_incX, A); + F77_chpr(F77_UL, &F77_N, &alpha, (scomplex*)X, &F77_incX, (scomplex*)A); } else if (order == CblasRowMajor) { @@ -99,12 +95,12 @@ void cblas_chpr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, #ifdef F77_INT F77_incX = 1; #else - incx = 1; + incX = 1; #endif } else x = (float *) X; - F77_chpr(F77_UL, &F77_N, &alpha, x, &F77_incX, A); + F77_chpr(F77_UL, &F77_N, &alpha, (scomplex*)x, &F77_incX, (scomplex*)A); } else { diff --git a/frame/compat/cblas/src/cblas_chpr2.c b/frame/compat/cblas/src/cblas_chpr2.c index c59f80130..f696083ab 100644 --- a/frame/compat/cblas/src/cblas_chpr2.c +++ b/frame/compat/cblas/src/cblas_chpr2.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_chpr2.c @@ -15,9 +11,9 @@ #include #include "cblas.h" #include "cblas_f77.h" -void cblas_chpr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N,const void *alpha, const void *X, - const int incX,const void *Y, const int incY, void *Ap) +void cblas_chpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N,const void *alpha, const void *X, + f77_int incX,const void *Y, f77_int incY, void *Ap) { char UL; @@ -31,8 +27,8 @@ void cblas_chpr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N - #define F77_incX incx - #define F77_incY incy + #define F77_incX incX + #define F77_incY incY #endif int n, i, j, tincx, tincy; float *x=(float *)X, *xx=(float *)X, *y=(float *)Y, @@ -58,7 +54,7 @@ void cblas_chpr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_UL = C2F_CHAR(&UL); #endif - F77_chpr2(F77_UL, &F77_N, alpha, X, &F77_incX, Y, &F77_incY, Ap); + F77_chpr2(F77_UL, &F77_N, (scomplex*)alpha, (scomplex*)X, &F77_incX, (scomplex*)Y, &F77_incY, (scomplex*)Ap); } else if (order == CblasRowMajor) { @@ -128,8 +124,8 @@ void cblas_chpr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_incX = 1; F77_incY = 1; #else - incx = 1; - incy = 1; + incX = 1; + incY = 1; #endif } else @@ -137,7 +133,7 @@ void cblas_chpr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, x = (float *) X; y = (void *) Y; } - F77_chpr2(F77_UL, &F77_N, alpha, y, &F77_incY, x, &F77_incX, Ap); + F77_chpr2(F77_UL, &F77_N, (scomplex*)alpha, (scomplex*)y, &F77_incY, (scomplex*)x, &F77_incX, (scomplex*)Ap); } else { cblas_xerbla(1, "cblas_chpr2","Illegal Order setting, %d\n", order); diff --git a/frame/compat/cblas/src/cblas_cscal.c b/frame/compat/cblas/src/cblas_cscal.c index 0a7607cb6..966b93ae9 100644 --- a/frame/compat/cblas/src/cblas_cscal.c +++ b/frame/compat/cblas/src/cblas_cscal.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_cscal.c @@ -14,8 +10,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_cscal( const int N, const void *alpha, void *X, - const int incX) +void cblas_cscal( f77_int N, const void *alpha, void *X, + f77_int incX) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; @@ -23,6 +19,6 @@ void cblas_cscal( const int N, const void *alpha, void *X, #define F77_N N #define F77_incX incX #endif - F77_cscal( &F77_N, alpha, X, &F77_incX); + F77_cscal( &F77_N, (scomplex*)alpha, (scomplex*)X, &F77_incX); } #endif diff --git a/frame/compat/cblas/src/cblas_csscal.c b/frame/compat/cblas/src/cblas_csscal.c index 653d20d8b..b2940d7d4 100644 --- a/frame/compat/cblas/src/cblas_csscal.c +++ b/frame/compat/cblas/src/cblas_csscal.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_csscal.c @@ -14,8 +10,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_csscal( const int N, const float alpha, void *X, - const int incX) +void cblas_csscal( f77_int N, float alpha, void *X, + f77_int incX) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; @@ -23,6 +19,6 @@ void cblas_csscal( const int N, const float alpha, void *X, #define F77_N N #define F77_incX incX #endif - F77_csscal( &F77_N, &alpha, X, &F77_incX); + F77_csscal( &F77_N, &alpha, (scomplex*)X, &F77_incX); } #endif diff --git a/frame/compat/cblas/src/cblas_cswap.c b/frame/compat/cblas/src/cblas_cswap.c index 7c77a0c6e..a5713a24c 100644 --- a/frame/compat/cblas/src/cblas_cswap.c +++ b/frame/compat/cblas/src/cblas_cswap.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_cswap.c @@ -14,8 +10,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_cswap( const int N, void *X, const int incX, void *Y, - const int incY) +void cblas_cswap( f77_int N, void *X, f77_int incX, void *Y, + f77_int incY) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; @@ -24,6 +20,6 @@ void cblas_cswap( const int N, void *X, const int incX, void *Y, #define F77_incX incX #define F77_incY incY #endif - F77_cswap( &F77_N, X, &F77_incX, Y, &F77_incY); + F77_cswap( &F77_N, (scomplex*)X, &F77_incX, (scomplex*)Y, &F77_incY); } #endif diff --git a/frame/compat/cblas/src/cblas_csymm.c b/frame/compat/cblas/src/cblas_csymm.c index 17590b45d..22cb266e4 100644 --- a/frame/compat/cblas/src/cblas_csymm.c +++ b/frame/compat/cblas/src/cblas_csymm.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const int M, const int N, - const void *alpha, const void *A, const int lda, - const void *B, const int ldb, const void *beta, - void *C, const int ldc) +void cblas_csymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, f77_int M, f77_int N, + const void *alpha, const void *A, f77_int lda, + const void *B, f77_int ldb, const void *beta, + void *C, f77_int ldc) { char SD, UL; #ifdef F77_CHAR @@ -72,8 +68,8 @@ void cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, F77_SD = C2F_CHAR(&SD); #endif - F77_csymm(F77_SD, F77_UL, &F77_M, &F77_N, alpha, A, &F77_lda, - B, &F77_ldb, beta, C, &F77_ldc); + F77_csymm(F77_SD, F77_UL, &F77_M, &F77_N, (scomplex*)alpha, (scomplex*)A, &F77_lda, + (scomplex*)B, &F77_ldb, (scomplex*)beta, (scomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; @@ -102,8 +98,8 @@ void cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, F77_SD = C2F_CHAR(&SD); #endif - F77_csymm(F77_SD, F77_UL, &F77_N, &F77_M, alpha, A, &F77_lda, - B, &F77_ldb, beta, C, &F77_ldc); + F77_csymm(F77_SD, F77_UL, &F77_N, &F77_M, (scomplex*)alpha, (scomplex*)A, &F77_lda, + (scomplex*)B, &F77_ldb, (scomplex*)beta, (scomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_csymm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; diff --git a/frame/compat/cblas/src/cblas_csyr2k.c b/frame/compat/cblas/src/cblas_csyr2k.c index b40b92ca5..c47072d74 100644 --- a/frame/compat/cblas/src/cblas_csyr2k.c +++ b/frame/compat/cblas/src/cblas_csyr2k.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const void *alpha, const void *A, const int lda, - const void *B, const int ldb, const void *beta, - void *C, const int ldc) +void cblas_csyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, + const void *alpha, const void *A, f77_int lda, + const void *B, f77_int ldb, const void *beta, + void *C, f77_int ldc) { char UL, TR; #ifdef F77_CHAR @@ -75,8 +71,8 @@ void cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, F77_TR = C2F_CHAR(&TR); #endif - F77_csyr2k(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda, - B, &F77_ldb, beta, C, &F77_ldc); + F77_csyr2k(F77_UL, F77_TR, &F77_N, &F77_K, (scomplex*)alpha, (scomplex*)A, &F77_lda, + (scomplex*)B, &F77_ldb, (scomplex*)beta, (scomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; @@ -105,7 +101,7 @@ void cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, F77_TR = C2F_CHAR(&TR); #endif - F77_csyr2k(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda, B, &F77_ldb, beta, C, &F77_ldc); + F77_csyr2k(F77_UL, F77_TR, &F77_N, &F77_K, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)B, &F77_ldb, (scomplex*)beta, (scomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_csyr2k", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; diff --git a/frame/compat/cblas/src/cblas_csyrk.c b/frame/compat/cblas/src/cblas_csyrk.c index c4927d06a..94d9e2a8f 100644 --- a/frame/compat/cblas/src/cblas_csyrk.c +++ b/frame/compat/cblas/src/cblas_csyrk.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,10 +11,10 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const void *alpha, const void *A, const int lda, - const void *beta, void *C, const int ldc) +void cblas_csyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, + const void *alpha, const void *A, f77_int lda, + const void *beta, void *C, f77_int ldc) { char UL, TR; #ifdef F77_CHAR @@ -73,8 +69,8 @@ void cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, F77_TR = C2F_CHAR(&TR); #endif - F77_csyrk(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda, - beta, C, &F77_ldc); + F77_csyrk(F77_UL, F77_TR, &F77_N, &F77_K, (scomplex*)alpha, (scomplex*)A, &F77_lda, + (scomplex*)beta, (scomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; @@ -103,8 +99,8 @@ void cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, F77_TR = C2F_CHAR(&TR); #endif - F77_csyrk(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda, - beta, C, &F77_ldc); + F77_csyrk(F77_UL, F77_TR, &F77_N, &F77_K, (scomplex*)alpha, (scomplex*)A, &F77_lda, + (scomplex*)beta, (scomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_csyrk", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; diff --git a/frame/compat/cblas/src/cblas_ctbmv.c b/frame/compat/cblas/src/cblas_ctbmv.c index cea3e898b..8dd2499c7 100644 --- a/frame/compat/cblas/src/cblas_ctbmv.c +++ b/frame/compat/cblas/src/cblas_ctbmv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ctbmv.c @@ -13,10 +9,10 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_ctbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const int K, const void *A, const int lda, - void *X, const int incX) +void cblas_ctbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, f77_int K, const void *A, f77_int lda, + void *X, f77_int incX) { char TA; char UL; @@ -78,7 +74,7 @@ void cblas_ctbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif - F77_ctbmv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X, + F77_ctbmv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, (scomplex*)A, &F77_lda, (scomplex*)X, &F77_incX); } else if (order == CblasRowMajor) @@ -141,7 +137,7 @@ void cblas_ctbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_DI = C2F_CHAR(&DI); #endif - F77_ctbmv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X, + F77_ctbmv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, (scomplex*)A, &F77_lda, (scomplex*)X, &F77_incX); if (TransA == CblasConjTrans) diff --git a/frame/compat/cblas/src/cblas_ctbsv.c b/frame/compat/cblas/src/cblas_ctbsv.c index c1d1e4084..d9dfdcd22 100644 --- a/frame/compat/cblas/src/cblas_ctbsv.c +++ b/frame/compat/cblas/src/cblas_ctbsv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ctbsv.c @@ -13,10 +9,10 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_ctbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const int K, const void *A, const int lda, - void *X, const int incX) +void cblas_ctbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, f77_int K, const void *A, f77_int lda, + void *X, f77_int incX) { char TA; char UL; @@ -78,7 +74,7 @@ void cblas_ctbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif - F77_ctbsv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X, + F77_ctbsv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, (scomplex*)A, &F77_lda, (scomplex*)X, &F77_incX); } else if (order == CblasRowMajor) @@ -145,7 +141,7 @@ void cblas_ctbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_DI = C2F_CHAR(&DI); #endif - F77_ctbsv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X, + F77_ctbsv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, (scomplex*)A, &F77_lda, (scomplex*)X, &F77_incX); if (TransA == CblasConjTrans) diff --git a/frame/compat/cblas/src/cblas_ctpmv.c b/frame/compat/cblas/src/cblas_ctpmv.c index fe2788d7a..8f9cb33a4 100644 --- a/frame/compat/cblas/src/cblas_ctpmv.c +++ b/frame/compat/cblas/src/cblas_ctpmv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ctpmv.c @@ -13,9 +9,9 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_ctpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const void *Ap, void *X, const int incX) +void cblas_ctpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const void *Ap, void *X, f77_int incX) { char TA; char UL; @@ -75,7 +71,7 @@ void cblas_ctpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif - F77_ctpmv( F77_UL, F77_TA, F77_DI, &F77_N, Ap, X, &F77_incX); + F77_ctpmv( F77_UL, F77_TA, F77_DI, &F77_N, (scomplex*)Ap, (scomplex*)X, &F77_incX); } else if (order == CblasRowMajor) { @@ -137,7 +133,7 @@ void cblas_ctpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_DI = C2F_CHAR(&DI); #endif - F77_ctpmv( F77_UL, F77_TA, F77_DI, &F77_N, Ap, X,&F77_incX); + F77_ctpmv( F77_UL, F77_TA, F77_DI, &F77_N, (scomplex*)Ap, (scomplex*)X,&F77_incX); if (TransA == CblasConjTrans) { if (N > 0) diff --git a/frame/compat/cblas/src/cblas_ctpsv.c b/frame/compat/cblas/src/cblas_ctpsv.c index 242e69b7b..77d5821e3 100644 --- a/frame/compat/cblas/src/cblas_ctpsv.c +++ b/frame/compat/cblas/src/cblas_ctpsv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ctpsv.c @@ -13,9 +9,9 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_ctpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const void *Ap, void *X, const int incX) +void cblas_ctpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const void *Ap, void *X, f77_int incX) { char TA; char UL; @@ -75,7 +71,7 @@ void cblas_ctpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif - F77_ctpsv( F77_UL, F77_TA, F77_DI, &F77_N, Ap, X, &F77_incX); + F77_ctpsv( F77_UL, F77_TA, F77_DI, &F77_N, (scomplex*)Ap, (scomplex*)X, &F77_incX); } else if (order == CblasRowMajor) { @@ -141,7 +137,7 @@ void cblas_ctpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_DI = C2F_CHAR(&DI); #endif - F77_ctpsv( F77_UL, F77_TA, F77_DI, &F77_N, Ap, X,&F77_incX); + F77_ctpsv( F77_UL, F77_TA, F77_DI, &F77_N, (scomplex*)Ap, (scomplex*)X,&F77_incX); if (TransA == CblasConjTrans) { diff --git a/frame/compat/cblas/src/cblas_ctrmm.c b/frame/compat/cblas/src/cblas_ctrmm.c index a387d7d1e..7eb621f3b 100644 --- a/frame/compat/cblas/src/cblas_ctrmm.c +++ b/frame/compat/cblas/src/cblas_ctrmm.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_DIAG Diag, const int M, const int N, - const void *alpha, const void *A, const int lda, - void *B, const int ldb) +void cblas_ctrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + enum CBLAS_DIAG Diag, f77_int M, f77_int N, + const void *alpha, const void *A, f77_int lda, + void *B, f77_int ldb) { char UL, TA, SD, DI; #ifdef F77_CHAR @@ -89,7 +85,7 @@ void cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, F77_DI = C2F_CHAR(&DI); #endif - F77_ctrmm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_M, &F77_N, alpha, A, &F77_lda, B, &F77_ldb); + F77_ctrmm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_M, &F77_N, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)B, &F77_ldb); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; @@ -141,7 +137,7 @@ void cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, F77_DI = C2F_CHAR(&DI); #endif - F77_ctrmm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_N, &F77_M, alpha, A, &F77_lda, B, &F77_ldb); + F77_ctrmm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_N, &F77_M, (scomplex*)alpha, (scomplex*)A, &F77_lda, (scomplex*)B, &F77_ldb); } else cblas_xerbla(1, "cblas_ctrmm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; diff --git a/frame/compat/cblas/src/cblas_ctrmv.c b/frame/compat/cblas/src/cblas_ctrmv.c index a84752bf4..756720207 100644 --- a/frame/compat/cblas/src/cblas_ctrmv.c +++ b/frame/compat/cblas/src/cblas_ctrmv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ctrmv.c @@ -13,10 +9,10 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_ctrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const void *A, const int lda, - void *X, const int incX) +void cblas_ctrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const void *A, f77_int lda, + void *X, f77_int incX) { char TA; @@ -78,7 +74,7 @@ void cblas_ctrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif - F77_ctrmv( F77_UL, F77_TA, F77_DI, &F77_N, A, &F77_lda, X, + F77_ctrmv( F77_UL, F77_TA, F77_DI, &F77_N, (scomplex*)A, &F77_lda, (scomplex*)X, &F77_incX); } else if (order == CblasRowMajor) @@ -139,7 +135,7 @@ void cblas_ctrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif - F77_ctrmv( F77_UL, F77_TA, F77_DI, &F77_N, A, &F77_lda, X, + F77_ctrmv( F77_UL, F77_TA, F77_DI, &F77_N, (scomplex*)A, &F77_lda, (scomplex*)X, &F77_incX); if (TransA == CblasConjTrans) { diff --git a/frame/compat/cblas/src/cblas_ctrsm.c b/frame/compat/cblas/src/cblas_ctrsm.c index e1f441983..f6e016a21 100644 --- a/frame/compat/cblas/src/cblas_ctrsm.c +++ b/frame/compat/cblas/src/cblas_ctrsm.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_DIAG Diag, const int M, const int N, - const void *alpha, const void *A, const int lda, - void *B, const int ldb) +void cblas_ctrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, + enum CBLAS_DIAG Diag, f77_int M, f77_int N, + const void *alpha, const void *A, f77_int lda, + void *B, f77_int ldb) { char UL, TA, SD, DI; #ifdef F77_CHAR @@ -151,8 +147,8 @@ void cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, #endif - F77_ctrsm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_N, &F77_M, alpha, A, - &F77_lda, B, &F77_ldb); + F77_ctrsm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_N, &F77_M, (scomplex*)alpha, (scomplex*)A, + &F77_lda, (scomplex*)B, &F77_ldb); } else cblas_xerbla(1, "cblas_ctrsm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; diff --git a/frame/compat/cblas/src/cblas_ctrsv.c b/frame/compat/cblas/src/cblas_ctrsv.c index f6dfb5215..539d97e55 100644 --- a/frame/compat/cblas/src/cblas_ctrsv.c +++ b/frame/compat/cblas/src/cblas_ctrsv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ctrsv.c @@ -13,10 +9,10 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_ctrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const void *A, const int lda, void *X, - const int incX) +void cblas_ctrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const void *A, f77_int lda, void *X, + f77_int incX) { char TA; char UL; @@ -77,7 +73,7 @@ void cblas_ctrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif - F77_ctrsv( F77_UL, F77_TA, F77_DI, &F77_N, A, &F77_lda, X, + F77_ctrsv( F77_UL, F77_TA, F77_DI, &F77_N, (scomplex*)A, &F77_lda, (scomplex*)X, &F77_incX); } else if (order == CblasRowMajor) @@ -140,7 +136,7 @@ void cblas_ctrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif - F77_ctrsv( F77_UL, F77_TA, F77_DI, &F77_N, A, &F77_lda, X, + F77_ctrsv( F77_UL, F77_TA, F77_DI, &F77_N, (scomplex*)A, &F77_lda, (scomplex*)X, &F77_incX); if (TransA == CblasConjTrans) { diff --git a/frame/compat/cblas/src/cblas_dasum.c b/frame/compat/cblas/src/cblas_dasum.c index 141ddf6df..39c2a06b1 100644 --- a/frame/compat/cblas/src/cblas_dasum.c +++ b/frame/compat/cblas/src/cblas_dasum.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dasum.c @@ -15,7 +11,7 @@ */ #include "cblas.h" #include "cblas_f77.h" -double cblas_dasum( const int N, const double *X, const int incX) +double cblas_dasum( f77_int N, const double *X, f77_int incX) { double asum; #ifdef F77_INT diff --git a/frame/compat/cblas/src/cblas_daxpy.c b/frame/compat/cblas/src/cblas_daxpy.c index 209b71443..5a3d08d6a 100644 --- a/frame/compat/cblas/src/cblas_daxpy.c +++ b/frame/compat/cblas/src/cblas_daxpy.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_daxpy.c @@ -14,8 +10,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_daxpy( const int N, const double alpha, const double *X, - const int incX, double *Y, const int incY) +void cblas_daxpy( f77_int N, double alpha, const double *X, + f77_int incX, double *Y, f77_int incY) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; diff --git a/frame/compat/cblas/src/cblas_dcopy.c b/frame/compat/cblas/src/cblas_dcopy.c index f4809836b..186c3d1d6 100644 --- a/frame/compat/cblas/src/cblas_dcopy.c +++ b/frame/compat/cblas/src/cblas_dcopy.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dcopy.c @@ -14,8 +10,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_dcopy( const int N, const double *X, - const int incX, double *Y, const int incY) +void cblas_dcopy( f77_int N, const double *X, + f77_int incX, double *Y, f77_int incY) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; diff --git a/frame/compat/cblas/src/cblas_ddot.c b/frame/compat/cblas/src/cblas_ddot.c index cbe4f441f..b1675d888 100644 --- a/frame/compat/cblas/src/cblas_ddot.c +++ b/frame/compat/cblas/src/cblas_ddot.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ddot.c @@ -15,8 +11,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -double cblas_ddot( const int N, const double *X, - const int incX, const double *Y, const int incY) +double cblas_ddot( f77_int N, const double *X, + f77_int incX, const double *Y, f77_int incY) { double dot; #ifdef F77_INT diff --git a/frame/compat/cblas/src/cblas_dgbmv.c b/frame/compat/cblas/src/cblas_dgbmv.c index 3d194827a..f0a19d5e8 100644 --- a/frame/compat/cblas/src/cblas_dgbmv.c +++ b/frame/compat/cblas/src/cblas_dgbmv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -14,12 +10,12 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_dgbmv(const enum CBLAS_ORDER order, - const enum CBLAS_TRANSPOSE TransA, const int M, const int N, - const int KL, const int KU, - const double alpha, const double *A, const int lda, - const double *X, const int incX, const double beta, - double *Y, const int incY) +void cblas_dgbmv(enum CBLAS_ORDER order, + enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, + f77_int KL, f77_int KU, + double alpha, const double *A, f77_int lda, + const double *X, f77_int incX, double beta, + double *Y, f77_int incY) { char TA; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_dgemm.c b/frame/compat/cblas/src/cblas_dgemm.c index 51e60f370..479a15abd 100644 --- a/frame/compat/cblas/src/cblas_dgemm.c +++ b/frame/compat/cblas/src/cblas_dgemm.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_TRANSPOSE TransB, const int M, const int N, - const int K, const double alpha, const double *A, - const int lda, const double *B, const int ldb, - const double beta, double *C, const int ldc) +void cblas_dgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, + enum CBLAS_TRANSPOSE TransB, f77_int M, f77_int N, + f77_int K, double alpha, const double *A, + f77_int lda, const double *B, f77_int ldb, + double beta, double *C, f77_int ldc) { char TA, TB; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_dgemv.c b/frame/compat/cblas/src/cblas_dgemv.c index efc7ae875..025523b2c 100644 --- a/frame/compat/cblas/src/cblas_dgemv.c +++ b/frame/compat/cblas/src/cblas_dgemv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -14,11 +10,11 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_dgemv(const enum CBLAS_ORDER order, - const enum CBLAS_TRANSPOSE TransA, const int M, const int N, - const double alpha, const double *A, const int lda, - const double *X, const int incX, const double beta, - double *Y, const int incY) +void cblas_dgemv(enum CBLAS_ORDER order, + enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, + double alpha, const double *A, f77_int lda, + const double *X, f77_int incX, double beta, + double *Y, f77_int incY) { char TA; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_dger.c b/frame/compat/cblas/src/cblas_dger.c index c661ac43a..366d393b6 100644 --- a/frame/compat/cblas/src/cblas_dger.c +++ b/frame/compat/cblas/src/cblas_dger.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,9 +11,9 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_dger(const enum CBLAS_ORDER order, const int M, const int N, - const double alpha, const double *X, const int incX, - const double *Y, const int incY, double *A, const int lda) +void cblas_dger(enum CBLAS_ORDER order, f77_int M, f77_int N, + double alpha, const double *X, f77_int incX, + const double *Y, f77_int incY, double *A, f77_int lda) { #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; diff --git a/frame/compat/cblas/src/cblas_dnrm2.c b/frame/compat/cblas/src/cblas_dnrm2.c index 7ccf71699..537ab8835 100644 --- a/frame/compat/cblas/src/cblas_dnrm2.c +++ b/frame/compat/cblas/src/cblas_dnrm2.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dnrm2.c @@ -15,7 +11,7 @@ */ #include "cblas.h" #include "cblas_f77.h" -double cblas_dnrm2( const int N, const double *X, const int incX) +double cblas_dnrm2( f77_int N, const double *X, f77_int incX) { double nrm2; #ifdef F77_INT diff --git a/frame/compat/cblas/src/cblas_drot.c b/frame/compat/cblas/src/cblas_drot.c index 200691a7a..b5dac7cad 100644 --- a/frame/compat/cblas/src/cblas_drot.c +++ b/frame/compat/cblas/src/cblas_drot.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_drot.c @@ -14,8 +10,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_drot(const int N, double *X, const int incX, - double *Y, const int incY, const double c, const double s) +void cblas_drot(f77_int N, double *X, f77_int incX, + double *Y, f77_int incY, const double c, const double s) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; diff --git a/frame/compat/cblas/src/cblas_drotg.c b/frame/compat/cblas/src/cblas_drotg.c index 18d0d348c..c509bafc7 100644 --- a/frame/compat/cblas/src/cblas_drotg.c +++ b/frame/compat/cblas/src/cblas_drotg.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_drotg.c diff --git a/frame/compat/cblas/src/cblas_drotm.c b/frame/compat/cblas/src/cblas_drotm.c index d7aca5fee..f0ae348dd 100644 --- a/frame/compat/cblas/src/cblas_drotm.c +++ b/frame/compat/cblas/src/cblas_drotm.c @@ -1,13 +1,9 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS #include "cblas.h" #include "cblas_f77.h" -void cblas_drotm( const int N, double *X, const int incX, double *Y, - const int incY, const double *P) +void cblas_drotm( f77_int N, double *X, f77_int incX, double *Y, + f77_int incY, const double *P) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; diff --git a/frame/compat/cblas/src/cblas_drotmg.c b/frame/compat/cblas/src/cblas_drotmg.c index cbf2738db..6b969bf87 100644 --- a/frame/compat/cblas/src/cblas_drotmg.c +++ b/frame/compat/cblas/src/cblas_drotmg.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_drotmg.c diff --git a/frame/compat/cblas/src/cblas_dsbmv.c b/frame/compat/cblas/src/cblas_dsbmv.c index ac0177c11..8398dd6ce 100644 --- a/frame/compat/cblas/src/cblas_dsbmv.c +++ b/frame/compat/cblas/src/cblas_dsbmv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_dsbmv(const enum CBLAS_ORDER order, - const enum CBLAS_UPLO Uplo, const int N, const int K, - const double alpha, const double *A, const int lda, - const double *X, const int incX, const double beta, - double *Y, const int incY) +void cblas_dsbmv(enum CBLAS_ORDER order, + enum CBLAS_UPLO Uplo, f77_int N, f77_int K, + double alpha, const double *A, f77_int lda, + const double *X, f77_int incX, double beta, + double *Y, f77_int incY) { char UL; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_dscal.c b/frame/compat/cblas/src/cblas_dscal.c index 53b4d3567..bb6da3f67 100644 --- a/frame/compat/cblas/src/cblas_dscal.c +++ b/frame/compat/cblas/src/cblas_dscal.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dscal.c @@ -14,8 +10,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_dscal( const int N, const double alpha, double *X, - const int incX) +void cblas_dscal( f77_int N, double alpha, double *X, + f77_int incX) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; diff --git a/frame/compat/cblas/src/cblas_dsdot.c b/frame/compat/cblas/src/cblas_dsdot.c index 7e37cf2f1..734a16dae 100644 --- a/frame/compat/cblas/src/cblas_dsdot.c +++ b/frame/compat/cblas/src/cblas_dsdot.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dsdot.c @@ -15,8 +11,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -double cblas_dsdot( const int N, const float *X, - const int incX, const float *Y, const int incY) +double cblas_dsdot( f77_int N, const float *X, + f77_int incX, const float *Y, f77_int incY) { double dot; #ifdef F77_INT diff --git a/frame/compat/cblas/src/cblas_dspmv.c b/frame/compat/cblas/src/cblas_dspmv.c index 79504b69b..b8e95cfa8 100644 --- a/frame/compat/cblas/src/cblas_dspmv.c +++ b/frame/compat/cblas/src/cblas_dspmv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -16,11 +12,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_dspmv(const enum CBLAS_ORDER order, - const enum CBLAS_UPLO Uplo, const int N, - const double alpha, const double *AP, - const double *X, const int incX, const double beta, - double *Y, const int incY) +void cblas_dspmv(enum CBLAS_ORDER order, + enum CBLAS_UPLO Uplo, f77_int N, + double alpha, const double *AP, + const double *X, f77_int incX, double beta, + double *Y, f77_int incY) { char UL; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_dspr.c b/frame/compat/cblas/src/cblas_dspr.c index 5e3f54cf7..9156f7a83 100644 --- a/frame/compat/cblas/src/cblas_dspr.c +++ b/frame/compat/cblas/src/cblas_dspr.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,9 +11,9 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_dspr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const double alpha, const double *X, - const int incX, double *Ap) +void cblas_dspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, double alpha, const double *X, + f77_int incX, double *Ap) { char UL; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_dspr2.c b/frame/compat/cblas/src/cblas_dspr2.c index 86bbe1359..8fade8319 100644 --- a/frame/compat/cblas/src/cblas_dspr2.c +++ b/frame/compat/cblas/src/cblas_dspr2.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dspr2.c @@ -13,9 +9,9 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_dspr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const double alpha, const double *X, - const int incX, const double *Y, const int incY, double *A) +void cblas_dspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, const double alpha, const double *X, + f77_int incX, const double *Y, f77_int incY, double *A) { char UL; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_dswap.c b/frame/compat/cblas/src/cblas_dswap.c index 3a26e968f..e204baea0 100644 --- a/frame/compat/cblas/src/cblas_dswap.c +++ b/frame/compat/cblas/src/cblas_dswap.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dswap.c @@ -14,8 +10,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_dswap( const int N, double *X, const int incX, double *Y, - const int incY) +void cblas_dswap( f77_int N, double *X, f77_int incX, double *Y, + f77_int incY) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; diff --git a/frame/compat/cblas/src/cblas_dsymm.c b/frame/compat/cblas/src/cblas_dsymm.c index b073c0106..81bc52268 100644 --- a/frame/compat/cblas/src/cblas_dsymm.c +++ b/frame/compat/cblas/src/cblas_dsymm.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const int M, const int N, - const double alpha, const double *A, const int lda, - const double *B, const int ldb, const double beta, - double *C, const int ldc) +void cblas_dsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, f77_int M, f77_int N, + double alpha, const double *A, f77_int lda, + const double *B, f77_int ldb, double beta, + double *C, f77_int ldc) { char SD, UL; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_dsymv.c b/frame/compat/cblas/src/cblas_dsymv.c index 42ce6a3c3..d0c2aa642 100644 --- a/frame/compat/cblas/src/cblas_dsymv.c +++ b/frame/compat/cblas/src/cblas_dsymv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_dsymv(const enum CBLAS_ORDER order, - const enum CBLAS_UPLO Uplo, const int N, - const double alpha, const double *A, const int lda, - const double *X, const int incX, const double beta, - double *Y, const int incY) +void cblas_dsymv(enum CBLAS_ORDER order, + enum CBLAS_UPLO Uplo, f77_int N, + double alpha, const double *A, f77_int lda, + const double *X, f77_int incX, double beta, + double *Y, f77_int incY) { char UL; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_dsyr.c b/frame/compat/cblas/src/cblas_dsyr.c index 6e9c6ff35..f6dd66c75 100644 --- a/frame/compat/cblas/src/cblas_dsyr.c +++ b/frame/compat/cblas/src/cblas_dsyr.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,9 +11,9 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_dsyr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const double alpha, const double *X, - const int incX, double *A, const int lda) +void cblas_dsyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, const double alpha, const double *X, + f77_int incX, double *A, f77_int lda) { char UL; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_dsyr2.c b/frame/compat/cblas/src/cblas_dsyr2.c index d2bf47c06..c0d773939 100644 --- a/frame/compat/cblas/src/cblas_dsyr2.c +++ b/frame/compat/cblas/src/cblas_dsyr2.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,10 +11,10 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_dsyr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const double alpha, const double *X, - const int incX, const double *Y, const int incY, double *A, - const int lda) +void cblas_dsyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, const double alpha, const double *X, + f77_int incX, const double *Y, f77_int incY, double *A, + f77_int lda) { char UL; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_dsyr2k.c b/frame/compat/cblas/src/cblas_dsyr2k.c index 13ee196d8..2c35b9929 100644 --- a/frame/compat/cblas/src/cblas_dsyr2k.c +++ b/frame/compat/cblas/src/cblas_dsyr2k.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const double alpha, const double *A, const int lda, - const double *B, const int ldb, const double beta, - double *C, const int ldc) +void cblas_dsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, + double alpha, const double *A, f77_int lda, + const double *B, f77_int ldb, double beta, + double *C, f77_int ldc) { char UL, TR; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_dsyrk.c b/frame/compat/cblas/src/cblas_dsyrk.c index aced9af74..8a7ca1651 100644 --- a/frame/compat/cblas/src/cblas_dsyrk.c +++ b/frame/compat/cblas/src/cblas_dsyrk.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,10 +11,10 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const double alpha, const double *A, const int lda, - const double beta, double *C, const int ldc) +void cblas_dsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, + double alpha, const double *A, f77_int lda, + double beta, double *C, f77_int ldc) { char UL, TR; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_dtbmv.c b/frame/compat/cblas/src/cblas_dtbmv.c index 54698b59c..6502c1c22 100644 --- a/frame/compat/cblas/src/cblas_dtbmv.c +++ b/frame/compat/cblas/src/cblas_dtbmv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dtbmv.c @@ -13,10 +9,10 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_dtbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const int K, const double *A, const int lda, - double *X, const int incX) +void cblas_dtbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, f77_int K, const double *A, f77_int lda, + double *X, f77_int incX) { char TA; char UL; diff --git a/frame/compat/cblas/src/cblas_dtbsv.c b/frame/compat/cblas/src/cblas_dtbsv.c index f5e4570fc..aae08e956 100644 --- a/frame/compat/cblas/src/cblas_dtbsv.c +++ b/frame/compat/cblas/src/cblas_dtbsv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dtbsv.c @@ -13,10 +9,10 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_dtbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const int K, const double *A, const int lda, - double *X, const int incX) +void cblas_dtbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, f77_int K, const double *A, f77_int lda, + double *X, f77_int incX) { char TA; char UL; diff --git a/frame/compat/cblas/src/cblas_dtpmv.c b/frame/compat/cblas/src/cblas_dtpmv.c index 184a54565..b72b4ed7e 100644 --- a/frame/compat/cblas/src/cblas_dtpmv.c +++ b/frame/compat/cblas/src/cblas_dtpmv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dtpmv.c @@ -13,9 +9,9 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_dtpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const double *Ap, double *X, const int incX) +void cblas_dtpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const double *Ap, double *X, f77_int incX) { char TA; char UL; diff --git a/frame/compat/cblas/src/cblas_dtpsv.c b/frame/compat/cblas/src/cblas_dtpsv.c index 893736436..501e4a256 100644 --- a/frame/compat/cblas/src/cblas_dtpsv.c +++ b/frame/compat/cblas/src/cblas_dtpsv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dtpsv.c @@ -13,9 +9,9 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_dtpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const double *Ap, double *X, const int incX) +void cblas_dtpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const double *Ap, double *X, f77_int incX) { char TA; char UL; diff --git a/frame/compat/cblas/src/cblas_dtrmm.c b/frame/compat/cblas/src/cblas_dtrmm.c index 7b292b112..c93c4862a 100644 --- a/frame/compat/cblas/src/cblas_dtrmm.c +++ b/frame/compat/cblas/src/cblas_dtrmm.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_DIAG Diag, const int M, const int N, - const double alpha, const double *A, const int lda, - double *B, const int ldb) +void cblas_dtrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + enum CBLAS_DIAG Diag, f77_int M, f77_int N, + double alpha, const double *A, f77_int lda, + double *B, f77_int ldb) { char UL, TA, SD, DI; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_dtrmv.c b/frame/compat/cblas/src/cblas_dtrmv.c index 505cb0d52..904d4b75b 100644 --- a/frame/compat/cblas/src/cblas_dtrmv.c +++ b/frame/compat/cblas/src/cblas_dtrmv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,10 +11,10 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_dtrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const double *A, const int lda, - double *X, const int incX) +void cblas_dtrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const double *A, f77_int lda, + double *X, f77_int incX) { char TA; diff --git a/frame/compat/cblas/src/cblas_dtrsm.c b/frame/compat/cblas/src/cblas_dtrsm.c index cfa38d5e8..552620495 100644 --- a/frame/compat/cblas/src/cblas_dtrsm.c +++ b/frame/compat/cblas/src/cblas_dtrsm.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_DIAG Diag, const int M, const int N, - const double alpha, const double *A, const int lda, - double *B, const int ldb) +void cblas_dtrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, + enum CBLAS_DIAG Diag, f77_int M, f77_int N, + double alpha, const double *A, f77_int lda, + double *B, f77_int ldb) { char UL, TA, SD, DI; diff --git a/frame/compat/cblas/src/cblas_dtrsv.c b/frame/compat/cblas/src/cblas_dtrsv.c index 5a747584b..4d257f7ea 100644 --- a/frame/compat/cblas/src/cblas_dtrsv.c +++ b/frame/compat/cblas/src/cblas_dtrsv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dtrsv.c @@ -13,10 +9,10 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_dtrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const double *A, const int lda, double *X, - const int incX) +void cblas_dtrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const double *A, f77_int lda, double *X, + f77_int incX) { char TA; diff --git a/frame/compat/cblas/src/cblas_dzasum.c b/frame/compat/cblas/src/cblas_dzasum.c index 8fe779c1d..325a54a19 100644 --- a/frame/compat/cblas/src/cblas_dzasum.c +++ b/frame/compat/cblas/src/cblas_dzasum.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dzasum.c @@ -15,7 +11,7 @@ */ #include "cblas.h" #include "cblas_f77.h" -double cblas_dzasum( const int N, const void *X, const int incX) +double cblas_dzasum( f77_int N, const void *X, f77_int incX) { double asum; #ifdef F77_INT diff --git a/frame/compat/cblas/src/cblas_dznrm2.c b/frame/compat/cblas/src/cblas_dznrm2.c index ab9495214..0df1ef29e 100644 --- a/frame/compat/cblas/src/cblas_dznrm2.c +++ b/frame/compat/cblas/src/cblas_dznrm2.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_dznrm2.c @@ -15,7 +11,7 @@ */ #include "cblas.h" #include "cblas_f77.h" -double cblas_dznrm2( const int N, const void *X, const int incX) +double cblas_dznrm2( f77_int N, const void *X, f77_int incX) { double nrm2; #ifdef F77_INT diff --git a/frame/compat/cblas/src/cblas_f77.h b/frame/compat/cblas/src/cblas_f77.h index 18435cd30..fcdd946df 100644 --- a/frame/compat/cblas/src/cblas_f77.h +++ b/frame/compat/cblas/src/cblas_f77.h @@ -4,698 +4,164 @@ * * Updated by Jeff Horner * Merged cblas_f77.h and cblas_fortran_header.h + * + * (Heavily hacked down from the original) */ #ifndef CBLAS_F77_H -#define CBLAS_f77_H +#define CBLAS_F77_H -#ifdef CRAY - #include - #define F77_CHAR _fcd - #define C2F_CHAR(a) ( _cptofcd( (a), 1 ) ) - #define C2F_STR(a, i) ( _cptofcd( (a), (i) ) ) - #define F77_STRLEN(a) (_fcdlen) -#endif - -#ifdef WeirdNEC - #define F77_INT long -#endif - -#ifdef F77_CHAR - #define FCHAR F77_CHAR -#else - #define FCHAR char * -#endif - -#ifdef F77_INT - #define FINT const F77_INT * - #define FINT2 F77_INT * -#else - #define FINT const int * - #define FINT2 int * -#endif - -#if defined(ADD_) /* * Level 1 BLAS */ -#define F77_xerbla xerbla_ - #define F77_srotg srotg_ - #define F77_srotmg srotmg_ - #define F77_srot srot_ - #define F77_srotm srotm_ - #define F77_drotg drotg_ - #define F77_drotmg drotmg_ - #define F77_drot drot_ - #define F77_drotm drotm_ - #define F77_sswap sswap_ - #define F77_scopy scopy_ - #define F77_saxpy saxpy_ - #define F77_isamax_sub isamaxsub_ - #define F77_dswap dswap_ - #define F77_dcopy dcopy_ - #define F77_daxpy daxpy_ - #define F77_idamax_sub idamaxsub_ - #define F77_cswap cswap_ - #define F77_ccopy ccopy_ - #define F77_caxpy caxpy_ - #define F77_icamax_sub icamaxsub_ - #define F77_zswap zswap_ - #define F77_zcopy zcopy_ - #define F77_zaxpy zaxpy_ - #define F77_izamax_sub izamaxsub_ - #define F77_sdot_sub sdotsub_ - #define F77_ddot_sub ddotsub_ - #define F77_dsdot_sub dsdotsub_ - #define F77_sscal sscal_ - #define F77_dscal dscal_ - #define F77_cscal cscal_ - #define F77_zscal zscal_ - #define F77_csscal csscal_ - #define F77_zdscal zdscal_ - #define F77_cdotu_sub cdotusub_ - #define F77_cdotc_sub cdotcsub_ - #define F77_zdotu_sub zdotusub_ - #define F77_zdotc_sub zdotcsub_ - #define F77_snrm2_sub snrm2sub_ - #define F77_sasum_sub sasumsub_ - #define F77_dnrm2_sub dnrm2sub_ - #define F77_dasum_sub dasumsub_ - #define F77_scnrm2_sub scnrm2sub_ - #define F77_scasum_sub scasumsub_ - #define F77_dznrm2_sub dznrm2sub_ - #define F77_dzasum_sub dzasumsub_ - #define F77_sdsdot_sub sdsdotsub_ +#define F77_xerbla xerbla_ +#define F77_srotg srotg_ +#define F77_srotmg srotmg_ +#define F77_srot srot_ +#define F77_srotm srotm_ +#define F77_drotg drotg_ +#define F77_drotmg drotmg_ +#define F77_drot drot_ +#define F77_drotm drotm_ +#define F77_sswap sswap_ +#define F77_scopy scopy_ +#define F77_saxpy saxpy_ +#define F77_isamax_sub isamaxsub_ +#define F77_dswap dswap_ +#define F77_dcopy dcopy_ +#define F77_daxpy daxpy_ +#define F77_idamax_sub idamaxsub_ +#define F77_cswap cswap_ +#define F77_ccopy ccopy_ +#define F77_caxpy caxpy_ +#define F77_icamax_sub icamaxsub_ +#define F77_zswap zswap_ +#define F77_zcopy zcopy_ +#define F77_zaxpy zaxpy_ +#define F77_izamax_sub izamaxsub_ +#define F77_sdot_sub sdotsub_ +#define F77_ddot_sub ddotsub_ +#define F77_dsdot_sub dsdotsub_ +#define F77_sscal sscal_ +#define F77_dscal dscal_ +#define F77_cscal cscal_ +#define F77_zscal zscal_ +#define F77_csscal csscal_ +#define F77_zdscal zdscal_ +#define F77_cdotu_sub cdotusub_ +#define F77_cdotc_sub cdotcsub_ +#define F77_zdotu_sub zdotusub_ +#define F77_zdotc_sub zdotcsub_ +#define F77_snrm2_sub snrm2sub_ +#define F77_sasum_sub sasumsub_ +#define F77_dnrm2_sub dnrm2sub_ +#define F77_dasum_sub dasumsub_ +#define F77_scnrm2_sub scnrm2sub_ +#define F77_scasum_sub scasumsub_ +#define F77_dznrm2_sub dznrm2sub_ +#define F77_dzasum_sub dzasumsub_ +#define F77_sdsdot_sub sdsdotsub_ /* - * Level 2 BLAS - */ - #define F77_ssymv ssymv_ - #define F77_ssbmv ssbmv_ - #define F77_sspmv sspmv_ - #define F77_sger sger_ - #define F77_ssyr ssyr_ - #define F77_sspr sspr_ - #define F77_ssyr2 ssyr2_ - #define F77_sspr2 sspr2_ - #define F77_dsymv dsymv_ - #define F77_dsbmv dsbmv_ - #define F77_dspmv dspmv_ - #define F77_dger dger_ - #define F77_dsyr dsyr_ - #define F77_dspr dspr_ - #define F77_dsyr2 dsyr2_ - #define F77_dspr2 dspr2_ - #define F77_chemv chemv_ - #define F77_chbmv chbmv_ - #define F77_chpmv chpmv_ - #define F77_cgeru cgeru_ - #define F77_cgerc cgerc_ - #define F77_cher cher_ - #define F77_chpr chpr_ - #define F77_cher2 cher2_ - #define F77_chpr2 chpr2_ - #define F77_zhemv zhemv_ - #define F77_zhbmv zhbmv_ - #define F77_zhpmv zhpmv_ - #define F77_zgeru zgeru_ - #define F77_zgerc zgerc_ - #define F77_zher zher_ - #define F77_zhpr zhpr_ - #define F77_zher2 zher2_ - #define F77_zhpr2 zhpr2_ - #define F77_sgemv sgemv_ - #define F77_sgbmv sgbmv_ - #define F77_strmv strmv_ - #define F77_stbmv stbmv_ - #define F77_stpmv stpmv_ - #define F77_strsv strsv_ - #define F77_stbsv stbsv_ - #define F77_stpsv stpsv_ - #define F77_dgemv dgemv_ - #define F77_dgbmv dgbmv_ - #define F77_dtrmv dtrmv_ - #define F77_dtbmv dtbmv_ - #define F77_dtpmv dtpmv_ - #define F77_dtrsv dtrsv_ - #define F77_dtbsv dtbsv_ - #define F77_dtpsv dtpsv_ - #define F77_cgemv cgemv_ - #define F77_cgbmv cgbmv_ - #define F77_ctrmv ctrmv_ - #define F77_ctbmv ctbmv_ - #define F77_ctpmv ctpmv_ - #define F77_ctrsv ctrsv_ - #define F77_ctbsv ctbsv_ - #define F77_ctpsv ctpsv_ - #define F77_zgemv zgemv_ - #define F77_zgbmv zgbmv_ - #define F77_ztrmv ztrmv_ - #define F77_ztbmv ztbmv_ - #define F77_ztpmv ztpmv_ - #define F77_ztrsv ztrsv_ - #define F77_ztbsv ztbsv_ - #define F77_ztpsv ztpsv_ +* Level 2 BLAS +*/ +#define F77_ssymv ssymv_ +#define F77_ssbmv ssbmv_ +#define F77_sspmv sspmv_ +#define F77_sger sger_ +#define F77_ssyr ssyr_ +#define F77_sspr sspr_ +#define F77_ssyr2 ssyr2_ +#define F77_sspr2 sspr2_ +#define F77_dsymv dsymv_ +#define F77_dsbmv dsbmv_ +#define F77_dspmv dspmv_ +#define F77_dger dger_ +#define F77_dsyr dsyr_ +#define F77_dspr dspr_ +#define F77_dsyr2 dsyr2_ +#define F77_dspr2 dspr2_ +#define F77_chemv chemv_ +#define F77_chbmv chbmv_ +#define F77_chpmv chpmv_ +#define F77_cgeru cgeru_ +#define F77_cgerc cgerc_ +#define F77_cher cher_ +#define F77_chpr chpr_ +#define F77_cher2 cher2_ +#define F77_chpr2 chpr2_ +#define F77_zhemv zhemv_ +#define F77_zhbmv zhbmv_ +#define F77_zhpmv zhpmv_ +#define F77_zgeru zgeru_ +#define F77_zgerc zgerc_ +#define F77_zher zher_ +#define F77_zhpr zhpr_ +#define F77_zher2 zher2_ +#define F77_zhpr2 zhpr2_ +#define F77_sgemv sgemv_ +#define F77_sgbmv sgbmv_ +#define F77_strmv strmv_ +#define F77_stbmv stbmv_ +#define F77_stpmv stpmv_ +#define F77_strsv strsv_ +#define F77_stbsv stbsv_ +#define F77_stpsv stpsv_ +#define F77_dgemv dgemv_ +#define F77_dgbmv dgbmv_ +#define F77_dtrmv dtrmv_ +#define F77_dtbmv dtbmv_ +#define F77_dtpmv dtpmv_ +#define F77_dtrsv dtrsv_ +#define F77_dtbsv dtbsv_ +#define F77_dtpsv dtpsv_ +#define F77_cgemv cgemv_ +#define F77_cgbmv cgbmv_ +#define F77_ctrmv ctrmv_ +#define F77_ctbmv ctbmv_ +#define F77_ctpmv ctpmv_ +#define F77_ctrsv ctrsv_ +#define F77_ctbsv ctbsv_ +#define F77_ctpsv ctpsv_ +#define F77_zgemv zgemv_ +#define F77_zgbmv zgbmv_ +#define F77_ztrmv ztrmv_ +#define F77_ztbmv ztbmv_ +#define F77_ztpmv ztpmv_ +#define F77_ztrsv ztrsv_ +#define F77_ztbsv ztbsv_ +#define F77_ztpsv ztpsv_ /* - * Level 3 BLAS - */ - #define F77_chemm chemm_ - #define F77_cherk cherk_ - #define F77_cher2k cher2k_ - #define F77_zhemm zhemm_ - #define F77_zherk zherk_ - #define F77_zher2k zher2k_ - #define F77_sgemm sgemm_ - #define F77_ssymm ssymm_ - #define F77_ssyrk ssyrk_ - #define F77_ssyr2k ssyr2k_ - #define F77_strmm strmm_ - #define F77_strsm strsm_ - #define F77_dgemm dgemm_ - #define F77_dsymm dsymm_ - #define F77_dsyrk dsyrk_ - #define F77_dsyr2k dsyr2k_ - #define F77_dtrmm dtrmm_ - #define F77_dtrsm dtrsm_ - #define F77_cgemm cgemm_ - #define F77_csymm csymm_ - #define F77_csyrk csyrk_ - #define F77_csyr2k csyr2k_ - #define F77_ctrmm ctrmm_ - #define F77_ctrsm ctrsm_ - #define F77_zgemm zgemm_ - #define F77_zsymm zsymm_ - #define F77_zsyrk zsyrk_ - #define F77_zsyr2k zsyr2k_ - #define F77_ztrmm ztrmm_ - #define F77_ztrsm ztrsm_ -#elif defined(UPCASE) -/* - * Level 1 BLAS - */ -#define F77_xerbla XERBLA - #define F77_srotg SROTG - #define F77_srotmg SROTMG - #define F77_srot SROT - #define F77_srotm SROTM - #define F77_drotg DROTG - #define F77_drotmg DROTMG - #define F77_drot DROT - #define F77_drotm DROTM - #define F77_sswap SSWAP - #define F77_scopy SCOPY - #define F77_saxpy SAXPY - #define F77_isamax_sub ISAMAXSUB - #define F77_dswap DSWAP - #define F77_dcopy DCOPY - #define F77_daxpy DAXPY - #define F77_idamax_sub IDAMAXSUB - #define F77_cswap CSWAP - #define F77_ccopy CCOPY - #define F77_caxpy CAXPY - #define F77_icamax_sub ICAMAXSUB - #define F77_zswap ZSWAP - #define F77_zcopy ZCOPY - #define F77_zaxpy ZAXPY - #define F77_izamax_sub IZAMAXSUB - #define F77_sdot_sub SDOTSUB - #define F77_ddot_sub DDOTSUB - #define F77_dsdot_sub DSDOTSUB - #define F77_sscal SSCAL - #define F77_dscal DSCAL - #define F77_cscal CSCAL - #define F77_zscal ZSCAL - #define F77_csscal CSSCAL - #define F77_zdscal ZDSCAL - #define F77_cdotu_sub CDOTUSUB - #define F77_cdotc_sub CDOTCSUB - #define F77_zdotu_sub ZDOTUSUB - #define F77_zdotc_sub ZDOTCSUB - #define F77_snrm2_sub SNRM2SUB - #define F77_sasum_sub SASUMSUB - #define F77_dnrm2_sub DNRM2SUB - #define F77_dasum_sub DASUMSUB - #define F77_scnrm2_sub SCNRM2SUB - #define F77_scasum_sub SCASUMSUB - #define F77_dznrm2_sub DZNRM2SUB - #define F77_dzasum_sub DZASUMSUB - #define F77_sdsdot_sub SDSDOTSUB -/* - * Level 2 BLAS - */ - #define F77_ssymv SSYMV - #define F77_ssbmv SSBMV - #define F77_sspmv SSPMV - #define F77_sger SGER - #define F77_ssyr SSYR - #define F77_sspr SSPR - #define F77_ssyr2 SSYR2 - #define F77_sspr2 SSPR2 - #define F77_dsymv DSYMV - #define F77_dsbmv DSBMV - #define F77_dspmv DSPMV - #define F77_dger DGER - #define F77_dsyr DSYR - #define F77_dspr DSPR - #define F77_dsyr2 DSYR2 - #define F77_dspr2 DSPR2 - #define F77_chemv CHEMV - #define F77_chbmv CHBMV - #define F77_chpmv CHPMV - #define F77_cgeru CGERU - #define F77_cgerc CGERC - #define F77_cher CHER - #define F77_chpr CHPR - #define F77_cher2 CHER2 - #define F77_chpr2 CHPR2 - #define F77_zhemv ZHEMV - #define F77_zhbmv ZHBMV - #define F77_zhpmv ZHPMV - #define F77_zgeru ZGERU - #define F77_zgerc ZGERC - #define F77_zher ZHER - #define F77_zhpr ZHPR - #define F77_zher2 ZHER2 - #define F77_zhpr2 ZHPR2 - #define F77_sgemv SGEMV - #define F77_sgbmv SGBMV - #define F77_strmv STRMV - #define F77_stbmv STBMV - #define F77_stpmv STPMV - #define F77_strsv STRSV - #define F77_stbsv STBSV - #define F77_stpsv STPSV - #define F77_dgemv DGEMV - #define F77_dgbmv DGBMV - #define F77_dtrmv DTRMV - #define F77_dtbmv DTBMV - #define F77_dtpmv DTPMV - #define F77_dtrsv DTRSV - #define F77_dtbsv DTBSV - #define F77_dtpsv DTPSV - #define F77_cgemv CGEMV - #define F77_cgbmv CGBMV - #define F77_ctrmv CTRMV - #define F77_ctbmv CTBMV - #define F77_ctpmv CTPMV - #define F77_ctrsv CTRSV - #define F77_ctbsv CTBSV - #define F77_ctpsv CTPSV - #define F77_zgemv ZGEMV - #define F77_zgbmv ZGBMV - #define F77_ztrmv ZTRMV - #define F77_ztbmv ZTBMV - #define F77_ztpmv ZTPMV - #define F77_ztrsv ZTRSV - #define F77_ztbsv ZTBSV - #define F77_ztpsv ZTPSV -/* - * Level 3 BLAS - */ - #define F77_chemm CHEMM - #define F77_cherk CHERK - #define F77_cher2k CHER2K - #define F77_zhemm ZHEMM - #define F77_zherk ZHERK - #define F77_zher2k ZHER2K - #define F77_sgemm SGEMM - #define F77_ssymm SSYMM - #define F77_ssyrk SSYRK - #define F77_ssyr2k SSYR2K - #define F77_strmm STRMM - #define F77_strsm STRSM - #define F77_dgemm DGEMM - #define F77_dsymm DSYMM - #define F77_dsyrk DSYRK - #define F77_dsyr2k DSYR2K - #define F77_dtrmm DTRMM - #define F77_dtrsm DTRSM - #define F77_cgemm CGEMM - #define F77_csymm CSYMM - #define F77_csyrk CSYRK - #define F77_csyr2k CSYR2K - #define F77_ctrmm CTRMM - #define F77_ctrsm CTRSM - #define F77_zgemm ZGEMM - #define F77_zsymm ZSYMM - #define F77_zsyrk ZSYRK - #define F77_zsyr2k ZSYR2K - #define F77_ztrmm ZTRMM - #define F77_ztrsm ZTRSM -#elif defined(NOCHANGE) -/* - * Level 1 BLAS - */ -#define F77_xerbla xerbla - #define F77_srotg srotg - #define F77_srotmg srotmg - #define F77_srot srot - #define F77_srotm srotm - #define F77_drotg drotg - #define F77_drotmg drotmg - #define F77_drot drot - #define F77_drotm drotm - #define F77_sswap sswap - #define F77_scopy scopy - #define F77_saxpy saxpy - #define F77_isamax_sub isamaxsub - #define F77_dswap dswap - #define F77_dcopy dcopy - #define F77_daxpy daxpy - #define F77_idamax_sub idamaxsub - #define F77_cswap cswap - #define F77_ccopy ccopy - #define F77_caxpy caxpy - #define F77_icamax_sub icamaxsub - #define F77_zswap zswap - #define F77_zcopy zcopy - #define F77_zaxpy zaxpy - #define F77_izamax_sub izamaxsub - #define F77_sdot_sub sdotsub - #define F77_ddot_sub ddotsub - #define F77_dsdot_sub dsdotsub - #define F77_sscal sscal - #define F77_dscal dscal - #define F77_cscal cscal - #define F77_zscal zscal - #define F77_csscal csscal - #define F77_zdscal zdscal - #define F77_cdotu_sub cdotusub - #define F77_cdotc_sub cdotcsub - #define F77_zdotu_sub zdotusub - #define F77_zdotc_sub zdotcsub - #define F77_snrm2_sub snrm2sub - #define F77_sasum_sub sasumsub - #define F77_dnrm2_sub dnrm2sub - #define F77_dasum_sub dasumsub - #define F77_scnrm2_sub scnrm2sub - #define F77_scasum_sub scasumsub - #define F77_dznrm2_sub dznrm2sub - #define F77_dzasum_sub dzasumsub - #define F77_sdsdot_sub sdsdotsub -/* - * Level 2 BLAS - */ - #define F77_ssymv ssymv - #define F77_ssbmv ssbmv - #define F77_sspmv sspmv - #define F77_sger sger - #define F77_ssyr ssyr - #define F77_sspr sspr - #define F77_ssyr2 ssyr2 - #define F77_sspr2 sspr2 - #define F77_dsymv dsymv - #define F77_dsbmv dsbmv - #define F77_dspmv dspmv - #define F77_dger dger - #define F77_dsyr dsyr - #define F77_dspr dspr - #define F77_dsyr2 dsyr2 - #define F77_dspr2 dspr2 - #define F77_chemv chemv - #define F77_chbmv chbmv - #define F77_chpmv chpmv - #define F77_cgeru cgeru - #define F77_cgerc cgerc - #define F77_cher cher - #define F77_chpr chpr - #define F77_cher2 cher2 - #define F77_chpr2 chpr2 - #define F77_zhemv zhemv - #define F77_zhbmv zhbmv - #define F77_zhpmv zhpmv - #define F77_zgeru zgeru - #define F77_zgerc zgerc - #define F77_zher zher - #define F77_zhpr zhpr - #define F77_zher2 zher2 - #define F77_zhpr2 zhpr2 - #define F77_sgemv sgemv - #define F77_sgbmv sgbmv - #define F77_strmv strmv - #define F77_stbmv stbmv - #define F77_stpmv stpmv - #define F77_strsv strsv - #define F77_stbsv stbsv - #define F77_stpsv stpsv - #define F77_dgemv dgemv - #define F77_dgbmv dgbmv - #define F77_dtrmv dtrmv - #define F77_dtbmv dtbmv - #define F77_dtpmv dtpmv - #define F77_dtrsv dtrsv - #define F77_dtbsv dtbsv - #define F77_dtpsv dtpsv - #define F77_cgemv cgemv - #define F77_cgbmv cgbmv - #define F77_ctrmv ctrmv - #define F77_ctbmv ctbmv - #define F77_ctpmv ctpmv - #define F77_ctrsv ctrsv - #define F77_ctbsv ctbsv - #define F77_ctpsv ctpsv - #define F77_zgemv zgemv - #define F77_zgbmv zgbmv - #define F77_ztrmv ztrmv - #define F77_ztbmv ztbmv - #define F77_ztpmv ztpmv - #define F77_ztrsv ztrsv - #define F77_ztbsv ztbsv - #define F77_ztpsv ztpsv -/* - * Level 3 BLAS - */ - #define F77_chemm chemm - #define F77_cherk cherk - #define F77_cher2k cher2k - #define F77_zhemm zhemm - #define F77_zherk zherk - #define F77_zher2k zher2k - #define F77_sgemm sgemm - #define F77_ssymm ssymm - #define F77_ssyrk ssyrk - #define F77_ssyr2k ssyr2k - #define F77_strmm strmm - #define F77_strsm strsm - #define F77_dgemm dgemm - #define F77_dsymm dsymm - #define F77_dsyrk dsyrk - #define F77_dsyr2k dsyr2k - #define F77_dtrmm dtrmm - #define F77_dtrsm dtrsm - #define F77_cgemm cgemm - #define F77_csymm csymm - #define F77_csyrk csyrk - #define F77_csyr2k csyr2k - #define F77_ctrmm ctrmm - #define F77_ctrsm ctrsm - #define F77_zgemm zgemm - #define F77_zsymm zsymm - #define F77_zsyrk zsyrk - #define F77_zsyr2k zsyr2k - #define F77_ztrmm ztrmm - #define F77_ztrsm ztrsm -#endif - -#ifdef __cplusplus -extern "C" { -#endif - - void F77_xerbla(FCHAR, void *); -/* - * Level 1 Fortran Prototypes - */ - -/* Single Precision */ - - void F77_srot(FINT, float *, FINT, float *, FINT, const float *, const float *); - void F77_srotg(float *,float *,float *,float *); - void F77_srotm( FINT, float *, FINT, float *, FINT, const float *); - void F77_srotmg(float *,float *,float *,const float *, float *); - void F77_sswap( FINT, float *, FINT, float *, FINT); - void F77_scopy( FINT, const float *, FINT, float *, FINT); - void F77_saxpy( FINT, const float *, const float *, FINT, float *, FINT); - void F77_sdot_sub(FINT, const float *, FINT, const float *, FINT, float *); - void F77_sdsdot_sub( FINT, const float *, const float *, FINT, const float *, FINT, float *); - void F77_sscal( FINT, const float *, float *, FINT); - void F77_snrm2_sub( FINT, const float *, FINT, float *); - void F77_sasum_sub( FINT, const float *, FINT, float *); - void F77_isamax_sub( FINT, const float * , FINT, FINT2); - -/* Double Precision */ - - void F77_drot(FINT, double *, FINT, double *, FINT, const double *, const double *); - void F77_drotg(double *,double *,double *,double *); - void F77_drotm( FINT, double *, FINT, double *, FINT, const double *); - void F77_drotmg(double *,double *,double *,const double *, double *); - void F77_dswap( FINT, double *, FINT, double *, FINT); - void F77_dcopy( FINT, const double *, FINT, double *, FINT); - void F77_daxpy( FINT, const double *, const double *, FINT, double *, FINT); - void F77_dswap( FINT, double *, FINT, double *, FINT); - void F77_dsdot_sub(FINT, const float *, FINT, const float *, FINT, double *); - void F77_ddot_sub( FINT, const double *, FINT, const double *, FINT, double *); - void F77_dscal( FINT, const double *, double *, FINT); - void F77_dnrm2_sub( FINT, const double *, FINT, double *); - void F77_dasum_sub( FINT, const double *, FINT, double *); - void F77_idamax_sub( FINT, const double * , FINT, FINT2); - -/* Single Complex Precision */ - - void F77_cswap( FINT, void *, FINT, void *, FINT); - void F77_ccopy( FINT, const void *, FINT, void *, FINT); - void F77_caxpy( FINT, const void *, const void *, FINT, void *, FINT); - void F77_cswap( FINT, void *, FINT, void *, FINT); - void F77_cdotc_sub( FINT, const void *, FINT, const void *, FINT, void *); - void F77_cdotu_sub( FINT, const void *, FINT, const void *, FINT, void *); - void F77_cscal( FINT, const void *, void *, FINT); - void F77_icamax_sub( FINT, const void *, FINT, FINT2); - void F77_csscal( FINT, const float *, void *, FINT); - void F77_scnrm2_sub( FINT, const void *, FINT, float *); - void F77_scasum_sub( FINT, const void *, FINT, float *); - -/* Double Complex Precision */ - - void F77_zswap( FINT, void *, FINT, void *, FINT); - void F77_zcopy( FINT, const void *, FINT, void *, FINT); - void F77_zaxpy( FINT, const void *, const void *, FINT, void *, FINT); - void F77_zswap( FINT, void *, FINT, void *, FINT); - void F77_zdotc_sub( FINT, const void *, FINT, const void *, FINT, void *); - void F77_zdotu_sub( FINT, const void *, FINT, const void *, FINT, void *); - void F77_zdscal( FINT, const double *, void *, FINT); - void F77_zscal( FINT, const void *, void *, FINT); - void F77_dznrm2_sub( FINT, const void *, FINT, double *); - void F77_dzasum_sub( FINT, const void *, FINT, double *); - void F77_izamax_sub( FINT, const void *, FINT, FINT2); - -/* - * Level 2 Fortran Prototypes - */ - -/* Single Precision */ - - void F77_sgemv(FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT); - void F77_sgbmv(FCHAR, FINT, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT); - void F77_ssymv(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT); - void F77_ssbmv(FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT); - void F77_sspmv(FCHAR, FINT, const float *, const float *, const float *, FINT, const float *, float *, FINT); - void F77_strmv( FCHAR, FCHAR, FCHAR, FINT, const float *, FINT, float *, FINT); - void F77_stbmv( FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, FINT, float *, FINT); - void F77_strsv( FCHAR, FCHAR, FCHAR, FINT, const float *, FINT, float *, FINT); - void F77_stbsv( FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, FINT, float *, FINT); - void F77_stpmv( FCHAR, FCHAR, FCHAR, FINT, const float *, float *, FINT); - void F77_stpsv( FCHAR, FCHAR, FCHAR, FINT, const float *, float *, FINT); - void F77_sger( FINT, FINT, const float *, const float *, FINT, const float *, FINT, float *, FINT); - void F77_ssyr(FCHAR, FINT, const float *, const float *, FINT, float *, FINT); - void F77_sspr(FCHAR, FINT, const float *, const float *, FINT, float *); - void F77_sspr2(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, float *); - void F77_ssyr2(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, float *, FINT); - -/* Double Precision */ - - void F77_dgemv(FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT); - void F77_dgbmv(FCHAR, FINT, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT); - void F77_dsymv(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT); - void F77_dsbmv(FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT); - void F77_dspmv(FCHAR, FINT, const double *, const double *, const double *, FINT, const double *, double *, FINT); - void F77_dtrmv( FCHAR, FCHAR, FCHAR, FINT, const double *, FINT, double *, FINT); - void F77_dtbmv( FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, FINT, double *, FINT); - void F77_dtrsv( FCHAR, FCHAR, FCHAR, FINT, const double *, FINT, double *, FINT); - void F77_dtbsv( FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, FINT, double *, FINT); - void F77_dtpmv( FCHAR, FCHAR, FCHAR, FINT, const double *, double *, FINT); - void F77_dtpsv( FCHAR, FCHAR, FCHAR, FINT, const double *, double *, FINT); - void F77_dger( FINT, FINT, const double *, const double *, FINT, const double *, FINT, double *, FINT); - void F77_dsyr(FCHAR, FINT, const double *, const double *, FINT, double *, FINT); - void F77_dspr(FCHAR, FINT, const double *, const double *, FINT, double *); - void F77_dspr2(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, double *); - void F77_dsyr2(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, double *, FINT); - -/* Single Complex Precision */ - - void F77_cgemv(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT); - void F77_cgbmv(FCHAR, FINT, FINT, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT); - void F77_chemv(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT); - void F77_chbmv(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT); - void F77_chpmv(FCHAR, FINT, const void *, const void *, const void *, FINT, const void *, void *, FINT); - void F77_ctrmv( FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT); - void F77_ctbmv( FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT); - void F77_ctpmv( FCHAR, FCHAR, FCHAR, FINT, const void *, void *, FINT); - void F77_ctrsv( FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT); - void F77_ctbsv( FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT); - void F77_ctpsv( FCHAR, FCHAR, FCHAR, FINT, const void *, void *,FINT); - void F77_cgerc( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); - void F77_cgeru( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); - void F77_cher(FCHAR, FINT, const float *, const void *, FINT, void *, FINT); - void F77_cher2(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); - void F77_chpr(FCHAR, FINT, const float *, const void *, FINT, void *); - void F77_chpr2(FCHAR, FINT, const float *, const void *, FINT, const void *, FINT, void *); - -/* Double Complex Precision */ - - void F77_zgemv(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT); - void F77_zgbmv(FCHAR, FINT, FINT, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT); - void F77_zhemv(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT); - void F77_zhbmv(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT); - void F77_zhpmv(FCHAR, FINT, const void *, const void *, const void *, FINT, const void *, void *, FINT); - void F77_ztrmv( FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT); - void F77_ztbmv( FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT); - void F77_ztpmv( FCHAR, FCHAR, FCHAR, FINT, const void *, void *, FINT); - void F77_ztrsv( FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT); - void F77_ztbsv( FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT); - void F77_ztpsv( FCHAR, FCHAR, FCHAR, FINT, const void *, void *,FINT); - void F77_zgerc( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); - void F77_zgeru( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); - void F77_zher(FCHAR, FINT, const double *, const void *, FINT, void *, FINT); - void F77_zher2(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT); - void F77_zhpr(FCHAR, FINT, const double *, const void *, FINT, void *); - void F77_zhpr2(FCHAR, FINT, const double *, const void *, FINT, const void *, FINT, void *); - -/* - * Level 3 Fortran Prototypes - */ - -/* Single Precision */ - - void F77_sgemm(FCHAR, FCHAR, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT); - void F77_ssymm(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT); - void F77_ssyrk(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT); - void F77_ssyr2k(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT); - void F77_strmm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT); - void F77_strsm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT); - -/* Double Precision */ - - void F77_dgemm(FCHAR, FCHAR, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT); - void F77_dsymm(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT); - void F77_dsyrk(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT); - void F77_dsyr2k(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT); - void F77_dtrmm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT); - void F77_dtrsm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT); - -/* Single Complex Precision */ - - void F77_cgemm(FCHAR, FCHAR, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT); - void F77_csymm(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT); - void F77_chemm(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT); - void F77_csyrk(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT); - void F77_cherk(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT); - void F77_csyr2k(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT); - void F77_cher2k(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT); - void F77_ctrmm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT); - void F77_ctrsm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT); - -/* Double Complex Precision */ - - void F77_zgemm(FCHAR, FCHAR, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT); - void F77_zsymm(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT); - void F77_zhemm(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT); - void F77_zsyrk(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT); - void F77_zherk(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT); - void F77_zsyr2k(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT); - void F77_zher2k(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT); - void F77_ztrmm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT); - void F77_ztrsm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT); - -#ifdef __cplusplus -} -#endif +* Level 3 BLAS +*/ +#define F77_chemm chemm_ +#define F77_cherk cherk_ +#define F77_cher2k cher2k_ +#define F77_zhemm zhemm_ +#define F77_zherk zherk_ +#define F77_zher2k zher2k_ +#define F77_sgemm sgemm_ +#define F77_ssymm ssymm_ +#define F77_ssyrk ssyrk_ +#define F77_ssyr2k ssyr2k_ +#define F77_strmm strmm_ +#define F77_strsm strsm_ +#define F77_dgemm dgemm_ +#define F77_dsymm dsymm_ +#define F77_dsyrk dsyrk_ +#define F77_dsyr2k dsyr2k_ +#define F77_dtrmm dtrmm_ +#define F77_dtrsm dtrsm_ +#define F77_cgemm cgemm_ +#define F77_csymm csymm_ +#define F77_csyrk csyrk_ +#define F77_csyr2k csyr2k_ +#define F77_ctrmm ctrmm_ +#define F77_ctrsm ctrsm_ +#define F77_zgemm zgemm_ +#define F77_zsymm zsymm_ +#define F77_zsyrk zsyrk_ +#define F77_zsyr2k zsyr2k_ +#define F77_ztrmm ztrmm_ +#define F77_ztrsm ztrsm_ #endif /* CBLAS_F77_H */ diff --git a/frame/compat/cblas/src/cblas_globals.c b/frame/compat/cblas/src/cblas_globals.c index e0d1afd93..b779d3c3f 100644 --- a/frame/compat/cblas/src/cblas_globals.c +++ b/frame/compat/cblas/src/cblas_globals.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS int CBLAS_CallFromC=0; int RowMajorStrg=0; diff --git a/frame/compat/cblas/src/cblas_icamax.c b/frame/compat/cblas/src/cblas_icamax.c index 5ca481f04..536a321f0 100644 --- a/frame/compat/cblas/src/cblas_icamax.c +++ b/frame/compat/cblas/src/cblas_icamax.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_icamax.c @@ -15,7 +11,7 @@ */ #include "cblas.h" #include "cblas_f77.h" -CBLAS_INDEX cblas_icamax( const int N, const void *X, const int incX) +f77_int cblas_icamax( f77_int N, const void *X, f77_int incX) { f77_int iamax; #ifdef F77_INT @@ -24,7 +20,7 @@ CBLAS_INDEX cblas_icamax( const int N, const void *X, const int incX) #define F77_N N #define F77_incX incX #endif - F77_icamax_sub( &F77_N, X, &F77_incX, &iamax); + F77_icamax_sub( &F77_N, (scomplex*)X, &F77_incX, &iamax); return iamax ? iamax-1 : 0; } #endif diff --git a/frame/compat/cblas/src/cblas_idamax.c b/frame/compat/cblas/src/cblas_idamax.c index 982cae781..8769352cf 100644 --- a/frame/compat/cblas/src/cblas_idamax.c +++ b/frame/compat/cblas/src/cblas_idamax.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_idamax.c @@ -15,7 +11,7 @@ */ #include "cblas.h" #include "cblas_f77.h" -CBLAS_INDEX cblas_idamax( const int N, const double *X, const int incX) +f77_int cblas_idamax( f77_int N, const double *X, f77_int incX) { f77_int iamax; #ifdef F77_INT diff --git a/frame/compat/cblas/src/cblas_isamax.c b/frame/compat/cblas/src/cblas_isamax.c index 3edf1bb65..dcc3f1e09 100644 --- a/frame/compat/cblas/src/cblas_isamax.c +++ b/frame/compat/cblas/src/cblas_isamax.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_isamax.c @@ -15,7 +11,7 @@ */ #include "cblas.h" #include "cblas_f77.h" -CBLAS_INDEX cblas_isamax( const int N, const float *X, const int incX) +f77_int cblas_isamax( f77_int N, const float *X, f77_int incX) { f77_int iamax; #ifdef F77_INT diff --git a/frame/compat/cblas/src/cblas_izamax.c b/frame/compat/cblas/src/cblas_izamax.c index 6ec1b28a3..3dcd4c1b8 100644 --- a/frame/compat/cblas/src/cblas_izamax.c +++ b/frame/compat/cblas/src/cblas_izamax.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_izamax.c @@ -15,7 +11,7 @@ */ #include "cblas.h" #include "cblas_f77.h" -CBLAS_INDEX cblas_izamax( const int N, const void *X, const int incX) +f77_int cblas_izamax( f77_int N, const void *X, f77_int incX) { f77_int iamax; #ifdef F77_INT @@ -24,7 +20,7 @@ CBLAS_INDEX cblas_izamax( const int N, const void *X, const int incX) #define F77_N N #define F77_incX incX #endif - F77_izamax_sub( &F77_N, X, &F77_incX, &iamax); + F77_izamax_sub( &F77_N, (dcomplex*)X, &F77_incX, &iamax); return (iamax ? iamax-1 : 0); } #endif diff --git a/frame/compat/cblas/src/cblas_sasum.c b/frame/compat/cblas/src/cblas_sasum.c index ddf4689f5..7d0e637b8 100644 --- a/frame/compat/cblas/src/cblas_sasum.c +++ b/frame/compat/cblas/src/cblas_sasum.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_sasum.c @@ -15,7 +11,7 @@ */ #include "cblas.h" #include "cblas_f77.h" -float cblas_sasum( const int N, const float *X, const int incX) +float cblas_sasum( f77_int N, const float *X, f77_int incX) { float asum; #ifdef F77_INT diff --git a/frame/compat/cblas/src/cblas_saxpy.c b/frame/compat/cblas/src/cblas_saxpy.c index 2b0b77375..b7f2b2d4e 100644 --- a/frame/compat/cblas/src/cblas_saxpy.c +++ b/frame/compat/cblas/src/cblas_saxpy.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_saxpy.c @@ -15,8 +11,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_saxpy( const int N, const float alpha, const float *X, - const int incX, float *Y, const int incY) +void cblas_saxpy( f77_int N, float alpha, const float *X, + f77_int incX, float *Y, f77_int incY) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; diff --git a/frame/compat/cblas/src/cblas_scasum.c b/frame/compat/cblas/src/cblas_scasum.c index de3b4d6d9..32c958749 100644 --- a/frame/compat/cblas/src/cblas_scasum.c +++ b/frame/compat/cblas/src/cblas_scasum.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_scasum.c @@ -15,7 +11,7 @@ */ #include "cblas.h" #include "cblas_f77.h" -float cblas_scasum( const int N, const void *X, const int incX) +float cblas_scasum( f77_int N, const void *X, f77_int incX) { float asum; #ifdef F77_INT diff --git a/frame/compat/cblas/src/cblas_scnrm2.c b/frame/compat/cblas/src/cblas_scnrm2.c index 012fd63fd..0e24c805c 100644 --- a/frame/compat/cblas/src/cblas_scnrm2.c +++ b/frame/compat/cblas/src/cblas_scnrm2.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_scnrm2.c @@ -15,7 +11,7 @@ */ #include "cblas.h" #include "cblas_f77.h" -float cblas_scnrm2( const int N, const void *X, const int incX) +float cblas_scnrm2( f77_int N, const void *X, f77_int incX) { float nrm2; #ifdef F77_INT diff --git a/frame/compat/cblas/src/cblas_scopy.c b/frame/compat/cblas/src/cblas_scopy.c index 586965561..f54343ff1 100644 --- a/frame/compat/cblas/src/cblas_scopy.c +++ b/frame/compat/cblas/src/cblas_scopy.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_scopy.c @@ -14,8 +10,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_scopy( const int N, const float *X, - const int incX, float *Y, const int incY) +void cblas_scopy( f77_int N, const float *X, + f77_int incX, float *Y, f77_int incY) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; diff --git a/frame/compat/cblas/src/cblas_sdot.c b/frame/compat/cblas/src/cblas_sdot.c index e29088317..bbf355887 100644 --- a/frame/compat/cblas/src/cblas_sdot.c +++ b/frame/compat/cblas/src/cblas_sdot.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_sdot.c @@ -15,8 +11,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -float cblas_sdot( const int N, const float *X, - const int incX, const float *Y, const int incY) +float cblas_sdot( f77_int N, const float *X, + f77_int incX, const float *Y, f77_int incY) { float dot; #ifdef F77_INT diff --git a/frame/compat/cblas/src/cblas_sdsdot.c b/frame/compat/cblas/src/cblas_sdsdot.c index f4fb4bf87..bfe8644aa 100644 --- a/frame/compat/cblas/src/cblas_sdsdot.c +++ b/frame/compat/cblas/src/cblas_sdsdot.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_sdsdot.c @@ -15,8 +11,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -float cblas_sdsdot( const int N, const float alpha, const float *X, - const int incX, const float *Y, const int incY) +float cblas_sdsdot( f77_int N, float alpha, const float *X, + f77_int incX, const float *Y, f77_int incY) { float dot; #ifdef F77_INT diff --git a/frame/compat/cblas/src/cblas_sgbmv.c b/frame/compat/cblas/src/cblas_sgbmv.c index ae38667d5..8bf944f9d 100644 --- a/frame/compat/cblas/src/cblas_sgbmv.c +++ b/frame/compat/cblas/src/cblas_sgbmv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,12 +11,12 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_sgbmv(const enum CBLAS_ORDER order, - const enum CBLAS_TRANSPOSE TransA, const int M, const int N, - const int KL, const int KU, - const float alpha, const float *A, const int lda, - const float *X, const int incX, const float beta, - float *Y, const int incY) +void cblas_sgbmv(enum CBLAS_ORDER order, + enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, + f77_int KL, f77_int KU, + float alpha, const float *A, f77_int lda, + const float *X, f77_int incX, float beta, + float *Y, f77_int incY) { char TA; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_sgemm.c b/frame/compat/cblas/src/cblas_sgemm.c index 026524aec..89d0f07a8 100644 --- a/frame/compat/cblas/src/cblas_sgemm.c +++ b/frame/compat/cblas/src/cblas_sgemm.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_TRANSPOSE TransB, const int M, const int N, - const int K, const float alpha, const float *A, - const int lda, const float *B, const int ldb, - const float beta, float *C, const int ldc) +void cblas_sgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, + enum CBLAS_TRANSPOSE TransB, f77_int M, f77_int N, + f77_int K, float alpha, const float *A, + f77_int lda, const float *B, f77_int ldb, + float beta, float *C, f77_int ldc) { char TA, TB; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_sgemv.c b/frame/compat/cblas/src/cblas_sgemv.c index 7ad22bc89..552140fec 100644 --- a/frame/compat/cblas/src/cblas_sgemv.c +++ b/frame/compat/cblas/src/cblas_sgemv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -14,11 +10,11 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_sgemv(const enum CBLAS_ORDER order, - const enum CBLAS_TRANSPOSE TransA, const int M, const int N, - const float alpha, const float *A, const int lda, - const float *X, const int incX, const float beta, - float *Y, const int incY) +void cblas_sgemv(enum CBLAS_ORDER order, + enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, + float alpha, const float *A, f77_int lda, + const float *X, f77_int incX, float beta, + float *Y, f77_int incY) { char TA; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_sger.c b/frame/compat/cblas/src/cblas_sger.c index f13753724..0617bad1e 100644 --- a/frame/compat/cblas/src/cblas_sger.c +++ b/frame/compat/cblas/src/cblas_sger.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,9 +11,9 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_sger(const enum CBLAS_ORDER order, const int M, const int N, - const float alpha, const float *X, const int incX, - const float *Y, const int incY, float *A, const int lda) +void cblas_sger(enum CBLAS_ORDER order, f77_int M, f77_int N, + const float alpha, const float *X, f77_int incX, + const float *Y, f77_int incY, float *A, f77_int lda) { #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; diff --git a/frame/compat/cblas/src/cblas_snrm2.c b/frame/compat/cblas/src/cblas_snrm2.c index cae230cca..7c0fd160a 100644 --- a/frame/compat/cblas/src/cblas_snrm2.c +++ b/frame/compat/cblas/src/cblas_snrm2.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_snrm2.c @@ -15,7 +11,7 @@ */ #include "cblas.h" #include "cblas_f77.h" -float cblas_snrm2( const int N, const float *X, const int incX) +float cblas_snrm2( f77_int N, const float *X, f77_int incX) { float nrm2; #ifdef F77_INT diff --git a/frame/compat/cblas/src/cblas_srot.c b/frame/compat/cblas/src/cblas_srot.c index d5b875803..b9e7ede46 100644 --- a/frame/compat/cblas/src/cblas_srot.c +++ b/frame/compat/cblas/src/cblas_srot.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_srot.c @@ -14,8 +10,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_srot( const int N, float *X, const int incX, float *Y, - const int incY, const float c, const float s) +void cblas_srot( f77_int N, float *X, f77_int incX, float *Y, + f77_int incY, const float c, const float s) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; diff --git a/frame/compat/cblas/src/cblas_srotg.c b/frame/compat/cblas/src/cblas_srotg.c index ad25895ea..165497f7e 100644 --- a/frame/compat/cblas/src/cblas_srotg.c +++ b/frame/compat/cblas/src/cblas_srotg.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_srotg.c diff --git a/frame/compat/cblas/src/cblas_srotm.c b/frame/compat/cblas/src/cblas_srotm.c index 08be377aa..ce417d769 100644 --- a/frame/compat/cblas/src/cblas_srotm.c +++ b/frame/compat/cblas/src/cblas_srotm.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_srotm.c @@ -14,8 +10,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_srotm( const int N, float *X, const int incX, float *Y, - const int incY, const float *P) +void cblas_srotm( f77_int N, float *X, f77_int incX, float *Y, + f77_int incY, const float *P) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; diff --git a/frame/compat/cblas/src/cblas_srotmg.c b/frame/compat/cblas/src/cblas_srotmg.c index 532926bd1..7f26ac659 100644 --- a/frame/compat/cblas/src/cblas_srotmg.c +++ b/frame/compat/cblas/src/cblas_srotmg.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_srotmg.c diff --git a/frame/compat/cblas/src/cblas_ssbmv.c b/frame/compat/cblas/src/cblas_ssbmv.c index eedd8a8d1..ef8a96cfb 100644 --- a/frame/compat/cblas/src/cblas_ssbmv.c +++ b/frame/compat/cblas/src/cblas_ssbmv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -14,10 +10,10 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_ssbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const int K, const float alpha, const float *A, - const int lda, const float *X, const int incX, - const float beta, float *Y, const int incY) +void cblas_ssbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, f77_int K, float alpha, const float *A, + f77_int lda, const float *X, f77_int incX, + float beta, float *Y, f77_int incY) { char UL; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_sscal.c b/frame/compat/cblas/src/cblas_sscal.c index a8b1e35ec..55547ae64 100644 --- a/frame/compat/cblas/src/cblas_sscal.c +++ b/frame/compat/cblas/src/cblas_sscal.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_sscal.c @@ -14,8 +10,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_sscal( const int N, const float alpha, float *X, - const int incX) +void cblas_sscal( f77_int N, float alpha, float *X, + f77_int incX) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; diff --git a/frame/compat/cblas/src/cblas_sspmv.c b/frame/compat/cblas/src/cblas_sspmv.c index b53c6da5a..a54f64214 100644 --- a/frame/compat/cblas/src/cblas_sspmv.c +++ b/frame/compat/cblas/src/cblas_sspmv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -14,11 +10,11 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_sspmv(const enum CBLAS_ORDER order, - const enum CBLAS_UPLO Uplo, const int N, - const float alpha, const float *AP, - const float *X, const int incX, const float beta, - float *Y, const int incY) +void cblas_sspmv(enum CBLAS_ORDER order, + enum CBLAS_UPLO Uplo, f77_int N, + float alpha, const float *AP, + const float *X, f77_int incX, float beta, + float *Y, f77_int incY) { char UL; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_sspr.c b/frame/compat/cblas/src/cblas_sspr.c index 015c131d2..693dbba8c 100644 --- a/frame/compat/cblas/src/cblas_sspr.c +++ b/frame/compat/cblas/src/cblas_sspr.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,9 +11,9 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_sspr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const float alpha, const float *X, - const int incX, float *Ap) +void cblas_sspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, const float alpha, const float *X, + f77_int incX, float *Ap) { char UL; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_sspr2.c b/frame/compat/cblas/src/cblas_sspr2.c index 35f095917..a4b612897 100644 --- a/frame/compat/cblas/src/cblas_sspr2.c +++ b/frame/compat/cblas/src/cblas_sspr2.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,9 +11,9 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_sspr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const float alpha, const float *X, - const int incX, const float *Y, const int incY, float *A) +void cblas_sspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, const float alpha, const float *X, + f77_int incX, const float *Y, f77_int incY, float *A) { char UL; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_sswap.c b/frame/compat/cblas/src/cblas_sswap.c index fd751cda2..2c24ad0fa 100644 --- a/frame/compat/cblas/src/cblas_sswap.c +++ b/frame/compat/cblas/src/cblas_sswap.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_sswap.c @@ -14,8 +10,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_sswap( const int N, float *X, const int incX, float *Y, - const int incY) +void cblas_sswap( f77_int N, float *X, f77_int incX, float *Y, + f77_int incY) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; diff --git a/frame/compat/cblas/src/cblas_ssymm.c b/frame/compat/cblas/src/cblas_ssymm.c index b795ebb90..218ddba6a 100644 --- a/frame/compat/cblas/src/cblas_ssymm.c +++ b/frame/compat/cblas/src/cblas_ssymm.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const int M, const int N, - const float alpha, const float *A, const int lda, - const float *B, const int ldb, const float beta, - float *C, const int ldc) +void cblas_ssymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, f77_int M, f77_int N, + float alpha, const float *A, f77_int lda, + const float *B, f77_int ldb, float beta, + float *C, f77_int ldc) { char SD, UL; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_ssymv.c b/frame/compat/cblas/src/cblas_ssymv.c index a85497f96..6027aaddf 100644 --- a/frame/compat/cblas/src/cblas_ssymv.c +++ b/frame/compat/cblas/src/cblas_ssymv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_ssymv(const enum CBLAS_ORDER order, - const enum CBLAS_UPLO Uplo, const int N, - const float alpha, const float *A, const int lda, - const float *X, const int incX, const float beta, - float *Y, const int incY) +void cblas_ssymv(enum CBLAS_ORDER order, + enum CBLAS_UPLO Uplo, f77_int N, + float alpha, const float *A, f77_int lda, + const float *X, f77_int incX, float beta, + float *Y, f77_int incY) { char UL; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_ssyr.c b/frame/compat/cblas/src/cblas_ssyr.c index 1e982ef8d..14ed8fe54 100644 --- a/frame/compat/cblas/src/cblas_ssyr.c +++ b/frame/compat/cblas/src/cblas_ssyr.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -14,9 +10,9 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_ssyr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const float alpha, const float *X, - const int incX, float *A, const int lda) +void cblas_ssyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, const float alpha, const float *X, + f77_int incX, float *A, f77_int lda) { char UL; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_ssyr2.c b/frame/compat/cblas/src/cblas_ssyr2.c index f55136317..48263f2ae 100644 --- a/frame/compat/cblas/src/cblas_ssyr2.c +++ b/frame/compat/cblas/src/cblas_ssyr2.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,10 +11,10 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_ssyr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const float alpha, const float *X, - const int incX, const float *Y, const int incY, float *A, - const int lda) +void cblas_ssyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, const float alpha, const float *X, + f77_int incX, const float *Y, f77_int incY, float *A, + f77_int lda) { char UL; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_ssyr2k.c b/frame/compat/cblas/src/cblas_ssyr2k.c index 5153beae5..50cf8bf1d 100644 --- a/frame/compat/cblas/src/cblas_ssyr2k.c +++ b/frame/compat/cblas/src/cblas_ssyr2k.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const float alpha, const float *A, const int lda, - const float *B, const int ldb, const float beta, - float *C, const int ldc) +void cblas_ssyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, + float alpha, const float *A, f77_int lda, + const float *B, f77_int ldb, float beta, + float *C, f77_int ldc) { char UL, TR; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_ssyrk.c b/frame/compat/cblas/src/cblas_ssyrk.c index efe1d3be6..6f486870f 100644 --- a/frame/compat/cblas/src/cblas_ssyrk.c +++ b/frame/compat/cblas/src/cblas_ssyrk.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,10 +11,10 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const float alpha, const float *A, const int lda, - const float beta, float *C, const int ldc) +void cblas_ssyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, + float alpha, const float *A, f77_int lda, + float beta, float *C, f77_int ldc) { char UL, TR; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_stbmv.c b/frame/compat/cblas/src/cblas_stbmv.c index f0799ccf3..15daa49da 100644 --- a/frame/compat/cblas/src/cblas_stbmv.c +++ b/frame/compat/cblas/src/cblas_stbmv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_stbmv.c @@ -13,10 +9,10 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_stbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const int K, const float *A, const int lda, - float *X, const int incX) +void cblas_stbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, f77_int K, const float *A, f77_int lda, + float *X, f77_int incX) { char TA; char UL; diff --git a/frame/compat/cblas/src/cblas_stbsv.c b/frame/compat/cblas/src/cblas_stbsv.c index ab4f44b53..ec9d9c844 100644 --- a/frame/compat/cblas/src/cblas_stbsv.c +++ b/frame/compat/cblas/src/cblas_stbsv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_stbsv.c @@ -13,10 +9,10 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_stbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const int K, const float *A, const int lda, - float *X, const int incX) +void cblas_stbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, f77_int K, const float *A, f77_int lda, + float *X, f77_int incX) { char TA; char UL; diff --git a/frame/compat/cblas/src/cblas_stpmv.c b/frame/compat/cblas/src/cblas_stpmv.c index 605de03de..91b308176 100644 --- a/frame/compat/cblas/src/cblas_stpmv.c +++ b/frame/compat/cblas/src/cblas_stpmv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -14,9 +10,9 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_stpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const float *Ap, float *X, const int incX) +void cblas_stpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const float *Ap, float *X, f77_int incX) { char TA; char UL; diff --git a/frame/compat/cblas/src/cblas_stpsv.c b/frame/compat/cblas/src/cblas_stpsv.c index ba3b599ec..548eaec79 100644 --- a/frame/compat/cblas/src/cblas_stpsv.c +++ b/frame/compat/cblas/src/cblas_stpsv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_stpsv.c @@ -13,9 +9,9 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_stpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const float *Ap, float *X, const int incX) +void cblas_stpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const float *Ap, float *X, f77_int incX) { char TA; char UL; diff --git a/frame/compat/cblas/src/cblas_strmm.c b/frame/compat/cblas/src/cblas_strmm.c index 810bce9d3..a7737109b 100644 --- a/frame/compat/cblas/src/cblas_strmm.c +++ b/frame/compat/cblas/src/cblas_strmm.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_DIAG Diag, const int M, const int N, - const float alpha, const float *A, const int lda, - float *B, const int ldb) +void cblas_strmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + enum CBLAS_DIAG Diag, f77_int M, f77_int N, + float alpha, const float *A, f77_int lda, + float *B, f77_int ldb) { char UL, TA, SD, DI; #ifdef F77_CHAR diff --git a/frame/compat/cblas/src/cblas_strmv.c b/frame/compat/cblas/src/cblas_strmv.c index 8d216751f..987589eb7 100644 --- a/frame/compat/cblas/src/cblas_strmv.c +++ b/frame/compat/cblas/src/cblas_strmv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -14,10 +10,10 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_strmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const float *A, const int lda, - float *X, const int incX) +void cblas_strmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const float *A, f77_int lda, + float *X, f77_int incX) { char TA; diff --git a/frame/compat/cblas/src/cblas_strsm.c b/frame/compat/cblas/src/cblas_strsm.c index 27be540e3..ef78969f0 100644 --- a/frame/compat/cblas/src/cblas_strsm.c +++ b/frame/compat/cblas/src/cblas_strsm.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_DIAG Diag, const int M, const int N, - const float alpha, const float *A, const int lda, - float *B, const int ldb) +void cblas_strsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, + enum CBLAS_DIAG Diag, f77_int M, f77_int N, + float alpha, const float *A, f77_int lda, + float *B, f77_int ldb) { char UL, TA, SD, DI; diff --git a/frame/compat/cblas/src/cblas_strsv.c b/frame/compat/cblas/src/cblas_strsv.c index a497dfc38..a37c797b8 100644 --- a/frame/compat/cblas/src/cblas_strsv.c +++ b/frame/compat/cblas/src/cblas_strsv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_strsv.c @@ -13,10 +9,10 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_strsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const float *A, const int lda, float *X, - const int incX) +void cblas_strsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const float *A, f77_int lda, float *X, + f77_int incX) { char TA; diff --git a/frame/compat/cblas/src/cblas_xerbla.c b/frame/compat/cblas/src/cblas_xerbla.c index d377db30d..5d7908068 100644 --- a/frame/compat/cblas/src/cblas_xerbla.c +++ b/frame/compat/cblas/src/cblas_xerbla.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS #include #include @@ -11,7 +7,7 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_xerbla(int info, const char *rout, const char *form, ...) +void cblas_xerbla(f77_int info, const char *rout, const char *form, ...) { extern int RowMajorStrg; char empty[1] = ""; @@ -69,7 +65,7 @@ void cblas_xerbla(int info, const char *rout, const char *form, ...) vfprintf(stderr, form, argptr); va_end(argptr); if (info && !info) - F77_xerbla(empty, &info); /* Force link of our F77 error handler */ + F77_xerbla(empty, &info, 0); /* Force link of our F77 error handler */ exit(-1); } #endif diff --git a/frame/compat/cblas/src/cblas_zaxpy.c b/frame/compat/cblas/src/cblas_zaxpy.c index 73b1d2e44..dcf2f5c78 100644 --- a/frame/compat/cblas/src/cblas_zaxpy.c +++ b/frame/compat/cblas/src/cblas_zaxpy.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zaxpy.c @@ -14,8 +10,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_zaxpy( const int N, const void *alpha, const void *X, - const int incX, void *Y, const int incY) +void cblas_zaxpy( f77_int N, const void *alpha, const void *X, + f77_int incX, void *Y, f77_int incY) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; @@ -24,6 +20,6 @@ void cblas_zaxpy( const int N, const void *alpha, const void *X, #define F77_incX incX #define F77_incY incY #endif - F77_zaxpy( &F77_N, alpha, X, &F77_incX, Y, &F77_incY); + F77_zaxpy( &F77_N, (dcomplex*)alpha, (dcomplex*)X, &F77_incX, (dcomplex*)Y, &F77_incY); } #endif diff --git a/frame/compat/cblas/src/cblas_zcopy.c b/frame/compat/cblas/src/cblas_zcopy.c index 1a8d92fc8..5b0a7dd5d 100644 --- a/frame/compat/cblas/src/cblas_zcopy.c +++ b/frame/compat/cblas/src/cblas_zcopy.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zcopy.c @@ -14,8 +10,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_zcopy( const int N, const void *X, - const int incX, void *Y, const int incY) +void cblas_zcopy( f77_int N, const void *X, + f77_int incX, void *Y, f77_int incY) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; @@ -24,6 +20,6 @@ void cblas_zcopy( const int N, const void *X, #define F77_incX incX #define F77_incY incY #endif - F77_zcopy( &F77_N, X, &F77_incX, Y, &F77_incY); + F77_zcopy( &F77_N, (dcomplex*)X, &F77_incX, (dcomplex*)Y, &F77_incY); } #endif diff --git a/frame/compat/cblas/src/cblas_zdotc_sub.c b/frame/compat/cblas/src/cblas_zdotc_sub.c index 0dc0d1b2b..2e0808c88 100644 --- a/frame/compat/cblas/src/cblas_zdotc_sub.c +++ b/frame/compat/cblas/src/cblas_zdotc_sub.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zdotc_sub.c @@ -15,8 +11,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_zdotc_sub( const int N, const void *X, const int incX, - const void *Y, const int incY, void *dotc) +void cblas_zdotc_sub( f77_int N, const void *X, f77_int incX, + const void *Y, f77_int incY, void *dotc) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; @@ -25,7 +21,7 @@ void cblas_zdotc_sub( const int N, const void *X, const int incX, #define F77_incX incX #define F77_incY incY #endif - F77_zdotc_sub( &F77_N, X, &F77_incX, Y, &F77_incY, dotc); + F77_zdotc_sub( &F77_N, (dcomplex*)X, &F77_incX, (dcomplex*)Y, &F77_incY, (dcomplex*)dotc); return; } #endif diff --git a/frame/compat/cblas/src/cblas_zdotu_sub.c b/frame/compat/cblas/src/cblas_zdotu_sub.c index b1d95c6da..255ad833f 100644 --- a/frame/compat/cblas/src/cblas_zdotu_sub.c +++ b/frame/compat/cblas/src/cblas_zdotu_sub.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zdotu_sub.c @@ -15,8 +11,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_zdotu_sub( const int N, const void *X, const int incX, - const void *Y, const int incY, void *dotu) +void cblas_zdotu_sub( f77_int N, const void *X, f77_int incX, + const void *Y, f77_int incY, void *dotu) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; @@ -25,7 +21,7 @@ void cblas_zdotu_sub( const int N, const void *X, const int incX, #define F77_incX incX #define F77_incY incY #endif - F77_zdotu_sub( &F77_N, X, &F77_incX, Y, &F77_incY, dotu); + F77_zdotu_sub( &F77_N, (dcomplex*)X, &F77_incX, (dcomplex*)Y, &F77_incY, (dcomplex*)dotu); return; } #endif diff --git a/frame/compat/cblas/src/cblas_zdscal.c b/frame/compat/cblas/src/cblas_zdscal.c index eeced12da..9666e4509 100644 --- a/frame/compat/cblas/src/cblas_zdscal.c +++ b/frame/compat/cblas/src/cblas_zdscal.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zdscal.c @@ -14,8 +10,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_zdscal( const int N, const double alpha, void *X, - const int incX) +void cblas_zdscal( f77_int N, double alpha, void *X, + f77_int incX) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; @@ -23,6 +19,6 @@ void cblas_zdscal( const int N, const double alpha, void *X, #define F77_N N #define F77_incX incX #endif - F77_zdscal( &F77_N, &alpha, X, &F77_incX); + F77_zdscal( &F77_N, &alpha, (dcomplex*)X, &F77_incX); } #endif diff --git a/frame/compat/cblas/src/cblas_zgbmv.c b/frame/compat/cblas/src/cblas_zgbmv.c index 19bdc8dec..f6efa4e7b 100644 --- a/frame/compat/cblas/src/cblas_zgbmv.c +++ b/frame/compat/cblas/src/cblas_zgbmv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zgbmv.c @@ -15,12 +11,12 @@ #include #include "cblas.h" #include "cblas_f77.h" -void cblas_zgbmv(const enum CBLAS_ORDER order, - const enum CBLAS_TRANSPOSE TransA, const int M, const int N, - const int KL, const int KU, - const void *alpha, const void *A, const int lda, - const void *X, const int incX, const void *beta, - void *Y, const int incY) +void cblas_zgbmv(enum CBLAS_ORDER order, + enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, + f77_int KL, f77_int KU, + const void *alpha, const void *A, f77_int lda, + const void *X, f77_int incX, const void *beta, + void *Y, f77_int incY) { char TA; #ifdef F77_CHAR @@ -37,7 +33,7 @@ void cblas_zgbmv(const enum CBLAS_ORDER order, #define F77_lda lda #define F77_KL KL #define F77_KU KU - #define F77_incX incx + #define F77_incX incX #define F77_incY incY #endif int n, i=0; @@ -65,8 +61,8 @@ void cblas_zgbmv(const enum CBLAS_ORDER order, #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); #endif - F77_zgbmv(F77_TA, &F77_M, &F77_N, &F77_KL, &F77_KU, alpha, - A, &F77_lda, X, &F77_incX, beta, Y, &F77_incY); + F77_zgbmv(F77_TA, &F77_M, &F77_N, &F77_KL, &F77_KU, (dcomplex*)alpha, + (dcomplex*)A, &F77_lda, (dcomplex*)X, &F77_incX, (dcomplex*)beta, (dcomplex*)Y, &F77_incY); } else if (order == CblasRowMajor) { @@ -109,7 +105,7 @@ void cblas_zgbmv(const enum CBLAS_ORDER order, #ifdef F77_INT F77_incX = 1; #else - incx = 1; + incX = 1; #endif if( incY > 0 ) @@ -146,11 +142,11 @@ void cblas_zgbmv(const enum CBLAS_ORDER order, F77_TA = C2F_CHAR(&TA); #endif if (TransA == CblasConjTrans) - F77_zgbmv(F77_TA, &F77_N, &F77_M, &F77_KU, &F77_KL, ALPHA, - A ,&F77_lda, x,&F77_incX, BETA, Y, &F77_incY); + F77_zgbmv(F77_TA, &F77_N, &F77_M, &F77_KU, &F77_KL, (dcomplex*)ALPHA, + (dcomplex*)A ,&F77_lda, (dcomplex*)x,&F77_incX, (dcomplex*)BETA, (dcomplex*)Y, &F77_incY); else - F77_zgbmv(F77_TA, &F77_N, &F77_M, &F77_KU, &F77_KL, alpha, - A ,&F77_lda, x,&F77_incX, beta, Y, &F77_incY); + F77_zgbmv(F77_TA, &F77_N, &F77_M, &F77_KU, &F77_KL, (dcomplex*)alpha, + (dcomplex*)A ,&F77_lda, (dcomplex*)x,&F77_incX, (dcomplex*)beta, (dcomplex*)Y, &F77_incY); if (TransA == CblasConjTrans) { if (x != X) free(x); diff --git a/frame/compat/cblas/src/cblas_zgemm.c b/frame/compat/cblas/src/cblas_zgemm.c index f91ece49f..e50de2205 100644 --- a/frame/compat/cblas/src/cblas_zgemm.c +++ b/frame/compat/cblas/src/cblas_zgemm.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_TRANSPOSE TransB, const int M, const int N, - const int K, const void *alpha, const void *A, - const int lda, const void *B, const int ldb, - const void *beta, void *C, const int ldc) +void cblas_zgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, + enum CBLAS_TRANSPOSE TransB, f77_int M, f77_int N, + f77_int K, const void *alpha, const void *A, + f77_int lda, const void *B, f77_int ldb, + const void *beta, void *C, f77_int ldc) { char TA, TB; #ifdef F77_CHAR @@ -75,8 +71,8 @@ void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA F77_TB = C2F_CHAR(&TB); #endif - F77_zgemm(F77_TA, F77_TB, &F77_M, &F77_N, &F77_K, alpha, A, - &F77_lda, B, &F77_ldb, beta, C, &F77_ldc); + F77_zgemm(F77_TA, F77_TB, &F77_M, &F77_N, &F77_K, (dcomplex*)alpha, (dcomplex*)A, + &F77_lda, (dcomplex*)B, &F77_ldb, (dcomplex*)beta, (dcomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; @@ -105,8 +101,8 @@ void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA F77_TB = C2F_CHAR(&TB); #endif - F77_zgemm(F77_TA, F77_TB, &F77_N, &F77_M, &F77_K, alpha, B, - &F77_ldb, A, &F77_lda, beta, C, &F77_ldc); + F77_zgemm(F77_TA, F77_TB, &F77_N, &F77_M, &F77_K, (dcomplex*)alpha, (dcomplex*)B, + &F77_ldb, (dcomplex*)A, &F77_lda, (dcomplex*)beta, (dcomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_zgemm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; diff --git a/frame/compat/cblas/src/cblas_zgemv.c b/frame/compat/cblas/src/cblas_zgemv.c index 924be72fb..05b0bfe84 100644 --- a/frame/compat/cblas/src/cblas_zgemv.c +++ b/frame/compat/cblas/src/cblas_zgemv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zgemv.c @@ -15,11 +11,11 @@ #include #include "cblas.h" #include "cblas_f77.h" -void cblas_zgemv(const enum CBLAS_ORDER order, - const enum CBLAS_TRANSPOSE TransA, const int M, const int N, - const void *alpha, const void *A, const int lda, - const void *X, const int incX, const void *beta, - void *Y, const int incY) +void cblas_zgemv(enum CBLAS_ORDER order, + enum CBLAS_TRANSPOSE TransA, f77_int M, f77_int N, + const void *alpha, const void *A, f77_int lda, + const void *X, f77_int incX, const void *beta, + void *Y, f77_int incY) { char TA; #ifdef F77_CHAR @@ -33,7 +29,7 @@ void cblas_zgemv(const enum CBLAS_ORDER order, #define F77_M M #define F77_N N #define F77_lda lda - #define F77_incX incx + #define F77_incX incX #define F77_incY incY #endif @@ -63,8 +59,8 @@ void cblas_zgemv(const enum CBLAS_ORDER order, #ifdef F77_CHAR F77_TA = C2F_CHAR(&TA); #endif - F77_zgemv(F77_TA, &F77_M, &F77_N, alpha, A, &F77_lda, X, &F77_incX, - beta, Y, &F77_incY); + F77_zgemv(F77_TA, &F77_M, &F77_N, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)X, &F77_incX, + (dcomplex*)beta, (dcomplex*)Y, &F77_incY); } else if (order == CblasRowMajor) { @@ -108,7 +104,7 @@ void cblas_zgemv(const enum CBLAS_ORDER order, #ifdef F77_INT F77_incX = 1; #else - incx = 1; + incX = 1; #endif if(incY > 0) @@ -143,11 +139,11 @@ void cblas_zgemv(const enum CBLAS_ORDER order, F77_TA = C2F_CHAR(&TA); #endif if (TransA == CblasConjTrans) - F77_zgemv(F77_TA, &F77_N, &F77_M, ALPHA, A, &F77_lda, x, - &F77_incX, BETA, Y, &F77_incY); + F77_zgemv(F77_TA, &F77_N, &F77_M, (dcomplex*)ALPHA, (dcomplex*)A, &F77_lda, (dcomplex*)x, + &F77_incX, (dcomplex*)BETA, (dcomplex*)Y, &F77_incY); else - F77_zgemv(F77_TA, &F77_N, &F77_M, alpha, A, &F77_lda, x, - &F77_incX, beta, Y, &F77_incY); + F77_zgemv(F77_TA, &F77_N, &F77_M, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)x, + &F77_incX, (dcomplex*)beta, (dcomplex*)Y, &F77_incY); if (TransA == CblasConjTrans) { diff --git a/frame/compat/cblas/src/cblas_zgerc.c b/frame/compat/cblas/src/cblas_zgerc.c index 9e56ef35e..9667958c5 100644 --- a/frame/compat/cblas/src/cblas_zgerc.c +++ b/frame/compat/cblas/src/cblas_zgerc.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zgerc.c @@ -15,9 +11,9 @@ #include #include "cblas.h" #include "cblas_f77.h" -void cblas_zgerc(const enum CBLAS_ORDER order, const int M, const int N, - const void *alpha, const void *X, const int incX, - const void *Y, const int incY, void *A, const int lda) +void cblas_zgerc(enum CBLAS_ORDER order, f77_int M, f77_int N, + const void *alpha, const void *X, f77_int incX, + const void *Y, f77_int incY, void *A, f77_int lda) { #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; @@ -25,7 +21,7 @@ void cblas_zgerc(const enum CBLAS_ORDER order, const int M, const int N, #define F77_M M #define F77_N N #define F77_incX incX - #define F77_incY incy + #define F77_incY incY #define F77_lda lda #endif @@ -39,7 +35,7 @@ void cblas_zgerc(const enum CBLAS_ORDER order, const int M, const int N, CBLAS_CallFromC = 1; if (order == CblasColMajor) { - F77_zgerc( &F77_M, &F77_N, alpha, X, &F77_incX, Y, &F77_incY, A, + F77_zgerc( &F77_M, &F77_N, (dcomplex*)alpha, (dcomplex*)X, &F77_incX, (dcomplex*)Y, &F77_incY, (dcomplex*)A, &F77_lda); } else if (order == CblasRowMajor) { @@ -73,12 +69,12 @@ void cblas_zgerc(const enum CBLAS_ORDER order, const int M, const int N, #ifdef F77_INT F77_incY = 1; #else - incy = 1; + incY = 1; #endif } else y = (double *) Y; - F77_zgeru( &F77_N, &F77_M, alpha, y, &F77_incY, X, &F77_incX, A, + F77_zgeru( &F77_N, &F77_M, (dcomplex*)alpha, (dcomplex*)y, &F77_incY, (dcomplex*)X, &F77_incX, (dcomplex*)A, &F77_lda); if(Y!=y) free(y); diff --git a/frame/compat/cblas/src/cblas_zgeru.c b/frame/compat/cblas/src/cblas_zgeru.c index 994f5e7df..3538ed044 100644 --- a/frame/compat/cblas/src/cblas_zgeru.c +++ b/frame/compat/cblas/src/cblas_zgeru.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zgeru.c @@ -13,9 +9,9 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_zgeru(const enum CBLAS_ORDER order, const int M, const int N, - const void *alpha, const void *X, const int incX, - const void *Y, const int incY, void *A, const int lda) +void cblas_zgeru(enum CBLAS_ORDER order, f77_int M, f77_int N, + const void *alpha, const void *X, f77_int incX, + const void *Y, f77_int incY, void *A, f77_int lda) { #ifdef F77_INT F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY; @@ -34,13 +30,13 @@ void cblas_zgeru(const enum CBLAS_ORDER order, const int M, const int N, if (order == CblasColMajor) { - F77_zgeru( &F77_M, &F77_N, alpha, X, &F77_incX, Y, &F77_incY, A, + F77_zgeru( &F77_M, &F77_N, (dcomplex*)alpha, (dcomplex*)X, &F77_incX, (dcomplex*)Y, &F77_incY, (dcomplex*)A, &F77_lda); } else if (order == CblasRowMajor) { RowMajorStrg = 1; - F77_zgeru( &F77_N, &F77_M, alpha, Y, &F77_incY, X, &F77_incX, A, + F77_zgeru( &F77_N, &F77_M, (dcomplex*)alpha, (dcomplex*)Y, &F77_incY, (dcomplex*)X, &F77_incX, (dcomplex*)A, &F77_lda); } else cblas_xerbla(1, "cblas_zgeru", "Illegal Order setting, %d\n", order); diff --git a/frame/compat/cblas/src/cblas_zhbmv.c b/frame/compat/cblas/src/cblas_zhbmv.c index 1cd5095d3..8bfd139e9 100644 --- a/frame/compat/cblas/src/cblas_zhbmv.c +++ b/frame/compat/cblas/src/cblas_zhbmv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zhbmv.c @@ -15,11 +11,11 @@ #include "cblas_f77.h" #include #include -void cblas_zhbmv(const enum CBLAS_ORDER order, - const enum CBLAS_UPLO Uplo,const int N,const int K, - const void *alpha, const void *A, const int lda, - const void *X, const int incX, const void *beta, - void *Y, const int incY) +void cblas_zhbmv(enum CBLAS_ORDER order, + enum CBLAS_UPLO Uplo,f77_int N,f77_int K, + const void *alpha, const void *A, f77_int lda, + const void *X, f77_int incX, const void *beta, + void *Y, f77_int incY) { char UL; #ifdef F77_CHAR @@ -33,7 +29,7 @@ void cblas_zhbmv(const enum CBLAS_ORDER order, #define F77_N N #define F77_K K #define F77_lda lda - #define F77_incX incx + #define F77_incX incX #define F77_incY incY #endif int n, i=0; @@ -60,8 +56,8 @@ void cblas_zhbmv(const enum CBLAS_ORDER order, #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif - F77_zhbmv(F77_UL, &F77_N, &F77_K, alpha, A, &F77_lda, X, - &F77_incX, beta, Y, &F77_incY); + F77_zhbmv(F77_UL, &F77_N, &F77_K, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)X, + &F77_incX, (dcomplex*)beta, (dcomplex*)Y, &F77_incY); } else if (order == CblasRowMajor) { @@ -102,7 +98,7 @@ void cblas_zhbmv(const enum CBLAS_ORDER order, #ifdef F77_INT F77_incX = 1; #else - incx = 1; + incX = 1; #endif if(incY > 0) @@ -134,8 +130,8 @@ void cblas_zhbmv(const enum CBLAS_ORDER order, #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif - F77_zhbmv(F77_UL, &F77_N, &F77_K, ALPHA, - A ,&F77_lda, x,&F77_incX, BETA, Y, &F77_incY); + F77_zhbmv(F77_UL, &F77_N, &F77_K, (dcomplex*)ALPHA, + (dcomplex*)A ,&F77_lda, (dcomplex*)x,&F77_incX, (dcomplex*)BETA, (dcomplex*)Y, &F77_incY); } else { diff --git a/frame/compat/cblas/src/cblas_zhemm.c b/frame/compat/cblas/src/cblas_zhemm.c index 596e72569..7da2a41e6 100644 --- a/frame/compat/cblas/src/cblas_zhemm.c +++ b/frame/compat/cblas/src/cblas_zhemm.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const int M, const int N, - const void *alpha, const void *A, const int lda, - const void *B, const int ldb, const void *beta, - void *C, const int ldc) +void cblas_zhemm(enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, f77_int M, f77_int N, + const void *alpha, const void *A, f77_int lda, + const void *B, f77_int ldb, const void *beta, + void *C, f77_int ldc) { char SD, UL; #ifdef F77_CHAR @@ -72,8 +68,8 @@ void cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, F77_SD = C2F_CHAR(&SD); #endif - F77_zhemm(F77_SD, F77_UL, &F77_M, &F77_N, alpha, A, &F77_lda, - B, &F77_ldb, beta, C, &F77_ldc); + F77_zhemm(F77_SD, F77_UL, &F77_M, &F77_N, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, + (dcomplex*)B, &F77_ldb, (dcomplex*)beta, (dcomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; @@ -102,8 +98,8 @@ void cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, F77_SD = C2F_CHAR(&SD); #endif - F77_zhemm(F77_SD, F77_UL, &F77_N, &F77_M, alpha, A, - &F77_lda, B, &F77_ldb, beta, C, &F77_ldc); + F77_zhemm(F77_SD, F77_UL, &F77_N, &F77_M, (dcomplex*)alpha, (dcomplex*)A, + &F77_lda, (dcomplex*)B, &F77_ldb, (dcomplex*)beta, (dcomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_zhemm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; diff --git a/frame/compat/cblas/src/cblas_zhemv.c b/frame/compat/cblas/src/cblas_zhemv.c index e30303e88..a0b6082ce 100644 --- a/frame/compat/cblas/src/cblas_zhemv.c +++ b/frame/compat/cblas/src/cblas_zhemv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zhemv.c @@ -15,11 +11,11 @@ #include #include "cblas.h" #include "cblas_f77.h" -void cblas_zhemv(const enum CBLAS_ORDER order, - const enum CBLAS_UPLO Uplo, const int N, - const void *alpha, const void *A, const int lda, - const void *X, const int incX, const void *beta, - void *Y, const int incY) +void cblas_zhemv(enum CBLAS_ORDER order, + enum CBLAS_UPLO Uplo, f77_int N, + const void *alpha, const void *A, f77_int lda, + const void *X, f77_int incX, const void *beta, + void *Y, f77_int incY) { char UL; #ifdef F77_CHAR @@ -32,7 +28,7 @@ void cblas_zhemv(const enum CBLAS_ORDER order, #else #define F77_N N #define F77_lda lda - #define F77_incX incx + #define F77_incX incX #define F77_incY incY #endif int n, i=0; @@ -60,8 +56,8 @@ void cblas_zhemv(const enum CBLAS_ORDER order, #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif - F77_zhemv(F77_UL, &F77_N, alpha, A, &F77_lda, X, &F77_incX, - beta, Y, &F77_incY); + F77_zhemv(F77_UL, &F77_N, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)X, &F77_incX, + (dcomplex*)beta, (dcomplex*)Y, &F77_incY); } else if (order == CblasRowMajor) { @@ -102,7 +98,7 @@ void cblas_zhemv(const enum CBLAS_ORDER order, #ifdef F77_INT F77_incX = 1; #else - incx = 1; + incX = 1; #endif if(incY > 0) @@ -135,8 +131,8 @@ void cblas_zhemv(const enum CBLAS_ORDER order, #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif - F77_zhemv(F77_UL, &F77_N, ALPHA, A, &F77_lda, x, &F77_incX, - BETA, Y, &F77_incY); + F77_zhemv(F77_UL, &F77_N, (dcomplex*)ALPHA, (dcomplex*)A, &F77_lda, (dcomplex*)x, &F77_incX, + (dcomplex*)BETA, (dcomplex*)Y, &F77_incY); } else { diff --git a/frame/compat/cblas/src/cblas_zher.c b/frame/compat/cblas/src/cblas_zher.c index af2e2ea6f..9e513d43b 100644 --- a/frame/compat/cblas/src/cblas_zher.c +++ b/frame/compat/cblas/src/cblas_zher.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zher.c @@ -15,9 +11,9 @@ #include #include "cblas.h" #include "cblas_f77.h" -void cblas_zher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const double alpha, const void *X, const int incX - ,void *A, const int lda) +void cblas_zher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, double alpha, const void *X, f77_int incX + ,void *A, f77_int lda) { char UL; #ifdef F77_CHAR @@ -31,7 +27,7 @@ void cblas_zher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, #else #define F77_N N #define F77_lda lda - #define F77_incX incx + #define F77_incX incX #endif int n, i, tincx; double *x=(double *)X, *xx=(double *)X, *tx, *st; @@ -56,7 +52,7 @@ void cblas_zher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_UL = C2F_CHAR(&UL); #endif - F77_zher(F77_UL, &F77_N, &alpha, X, &F77_incX, A, &F77_lda); + F77_zher(F77_UL, &F77_N, &alpha, (dcomplex*)X, &F77_incX, (dcomplex*)A, &F77_lda); } else if (order == CblasRowMajor) { @@ -101,11 +97,11 @@ void cblas_zher(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, #ifdef F77_INT F77_incX = 1; #else - incx = 1; + incX = 1; #endif } else x = (double *) X; - F77_zher(F77_UL, &F77_N, &alpha, x, &F77_incX, A, &F77_lda); + F77_zher(F77_UL, &F77_N, &alpha, (dcomplex*)x, &F77_incX, (dcomplex*)A, &F77_lda); } else cblas_xerbla(1, "cblas_zher", "Illegal Order setting, %d\n", order); if(X!=x) free(x); diff --git a/frame/compat/cblas/src/cblas_zher2.c b/frame/compat/cblas/src/cblas_zher2.c index 8a9bb2e9e..5d49302f9 100644 --- a/frame/compat/cblas/src/cblas_zher2.c +++ b/frame/compat/cblas/src/cblas_zher2.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zher2.c @@ -15,9 +11,9 @@ #include #include "cblas.h" #include "cblas_f77.h" -void cblas_zher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const void *alpha, const void *X, const int incX, - const void *Y, const int incY, void *A, const int lda) +void cblas_zher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, const void *alpha, const void *X, f77_int incX, + const void *Y, f77_int incY, void *A, f77_int lda) { char UL; #ifdef F77_CHAR @@ -31,8 +27,8 @@ void cblas_zher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, #else #define F77_N N #define F77_lda lda - #define F77_incX incx - #define F77_incY incy + #define F77_incX incX + #define F77_incY incY #endif int n, i, j, tincx, tincy; double *x=(double *)X, *xx=(double *)X, *y=(double *)Y, @@ -58,8 +54,8 @@ void cblas_zher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_UL = C2F_CHAR(&UL); #endif - F77_zher2(F77_UL, &F77_N, alpha, X, &F77_incX, - Y, &F77_incY, A, &F77_lda); + F77_zher2(F77_UL, &F77_N, (dcomplex*)alpha, (dcomplex*)X, &F77_incX, + (dcomplex*)Y, &F77_incY, (dcomplex*)A, &F77_lda); } else if (order == CblasRowMajor) { @@ -130,16 +126,16 @@ void cblas_zher2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_incX = 1; F77_incY = 1; #else - incx = 1; - incy = 1; + incX = 1; + incY = 1; #endif } else { x = (double *) X; y = (double *) Y; } - F77_zher2(F77_UL, &F77_N, alpha, y, &F77_incY, x, - &F77_incX, A, &F77_lda); + F77_zher2(F77_UL, &F77_N, (dcomplex*)alpha, (dcomplex*)y, &F77_incY, (dcomplex*)x, + &F77_incX, (dcomplex*)A, &F77_lda); } else { diff --git a/frame/compat/cblas/src/cblas_zher2k.c b/frame/compat/cblas/src/cblas_zher2k.c index d0b0fd130..424f650d0 100644 --- a/frame/compat/cblas/src/cblas_zher2k.c +++ b/frame/compat/cblas/src/cblas_zher2k.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const void *alpha, const void *A, const int lda, - const void *B, const int ldb, const double beta, - void *C, const int ldc) +void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, + const void *alpha, const void *A, f77_int lda, + const void *B, f77_int ldb, double beta, + void *C, f77_int ldc) { char UL, TR; #ifdef F77_CHAR @@ -77,7 +73,7 @@ void cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, F77_TR = C2F_CHAR(&TR); #endif - F77_zher2k(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc); + F77_zher2k(F77_UL, F77_TR, &F77_N, &F77_K, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)B, &F77_ldb, &beta, (dcomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; @@ -108,7 +104,7 @@ void cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, ALPHA[0]= *alp; ALPHA[1]= -alp[1]; - F77_zher2k(F77_UL,F77_TR, &F77_N, &F77_K, ALPHA, A, &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc); + F77_zher2k(F77_UL,F77_TR, &F77_N, &F77_K, (dcomplex*)ALPHA, (dcomplex*)A, &F77_lda, (dcomplex*)B, &F77_ldb, &beta, (dcomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_zher2k", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; RowMajorStrg = 0; diff --git a/frame/compat/cblas/src/cblas_zherk.c b/frame/compat/cblas/src/cblas_zherk.c index 630376414..5ddb45184 100644 --- a/frame/compat/cblas/src/cblas_zherk.c +++ b/frame/compat/cblas/src/cblas_zherk.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,10 +11,10 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const double alpha, const void *A, const int lda, - const double beta, void *C, const int ldc) +void cblas_zherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, + double alpha, const void *A, f77_int lda, + double beta, void *C, f77_int ldc) { char UL, TR; #ifdef F77_CHAR @@ -71,8 +67,8 @@ void cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, F77_TR = C2F_CHAR(&TR); #endif - F77_zherk(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, A, &F77_lda, - &beta, C, &F77_ldc); + F77_zherk(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, (dcomplex*)A, &F77_lda, + &beta, (dcomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; @@ -101,8 +97,8 @@ void cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, F77_SD = C2F_CHAR(&SD); #endif - F77_zherk(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, A, &F77_lda, - &beta, C, &F77_ldc); + F77_zherk(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, (dcomplex*)A, &F77_lda, + &beta, (dcomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_zherk", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; diff --git a/frame/compat/cblas/src/cblas_zhpmv.c b/frame/compat/cblas/src/cblas_zhpmv.c index 17dc943fd..912300d44 100644 --- a/frame/compat/cblas/src/cblas_zhpmv.c +++ b/frame/compat/cblas/src/cblas_zhpmv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zhpmv.c @@ -15,11 +11,11 @@ #include #include "cblas.h" #include "cblas_f77.h" -void cblas_zhpmv(const enum CBLAS_ORDER order, - const enum CBLAS_UPLO Uplo,const int N, +void cblas_zhpmv(enum CBLAS_ORDER order, + enum CBLAS_UPLO Uplo,f77_int N, const void *alpha, const void *AP, - const void *X, const int incX, const void *beta, - void *Y, const int incY) + const void *X, f77_int incX, const void *beta, + void *Y, f77_int incY) { char UL; #ifdef F77_CHAR @@ -31,7 +27,7 @@ void cblas_zhpmv(const enum CBLAS_ORDER order, F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N - #define F77_incX incx + #define F77_incX incX #define F77_incY incY #endif int n, i=0; @@ -58,8 +54,8 @@ void cblas_zhpmv(const enum CBLAS_ORDER order, #ifdef F77_CHAR F77_UL = C2F_CHAR(&UL); #endif - F77_zhpmv(F77_UL, &F77_N, alpha, AP, X, - &F77_incX, beta, Y, &F77_incY); + F77_zhpmv(F77_UL, &F77_N, (dcomplex*)alpha, (dcomplex*)AP, (dcomplex*)X, + &F77_incX, (dcomplex*)beta, (dcomplex*)Y, &F77_incY); } else if (order == CblasRowMajor) { @@ -100,7 +96,7 @@ void cblas_zhpmv(const enum CBLAS_ORDER order, #ifdef F77_INT F77_incX = 1; #else - incx = 1; + incX = 1; #endif if(incY > 0) @@ -134,8 +130,8 @@ void cblas_zhpmv(const enum CBLAS_ORDER order, F77_UL = C2F_CHAR(&UL); #endif - F77_zhpmv(F77_UL, &F77_N, ALPHA, - AP, x, &F77_incX, BETA, Y, &F77_incY); + F77_zhpmv(F77_UL, &F77_N, (dcomplex*)ALPHA, + (dcomplex*)AP, (dcomplex*)x, &F77_incX, (dcomplex*)BETA, (dcomplex*)Y, &F77_incY); } else { diff --git a/frame/compat/cblas/src/cblas_zhpr.c b/frame/compat/cblas/src/cblas_zhpr.c index ae0a6cc4c..80d238170 100644 --- a/frame/compat/cblas/src/cblas_zhpr.c +++ b/frame/compat/cblas/src/cblas_zhpr.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zhpr.c @@ -15,9 +11,9 @@ #include #include "cblas.h" #include "cblas_f77.h" -void cblas_zhpr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N, const double alpha, const void *X, - const int incX, void *A) +void cblas_zhpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N, double alpha, const void *X, + f77_int incX, void *A) { char UL; #ifdef F77_CHAR @@ -30,7 +26,7 @@ void cblas_zhpr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_INT F77_N=N, F77_incX=incX; #else #define F77_N N - #define F77_incX incx + #define F77_incX incX #endif int n, i, tincx; double *x=(double *)X, *xx=(double *)X, *tx, *st; @@ -55,7 +51,7 @@ void cblas_zhpr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_UL = C2F_CHAR(&UL); #endif - F77_zhpr(F77_UL, &F77_N, &alpha, X, &F77_incX, A); + F77_zhpr(F77_UL, &F77_N, &alpha, (dcomplex*)X, &F77_incX, (dcomplex*)A); } else if (order == CblasRowMajor) { @@ -99,12 +95,12 @@ void cblas_zhpr(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, #ifdef F77_INT F77_incX = 1; #else - incx = 1; + incX = 1; #endif } else x = (double *) X; - F77_zhpr(F77_UL, &F77_N, &alpha, x, &F77_incX, A); + F77_zhpr(F77_UL, &F77_N, &alpha, (dcomplex*)x, &F77_incX, (dcomplex*)A); } else { diff --git a/frame/compat/cblas/src/cblas_zhpr2.c b/frame/compat/cblas/src/cblas_zhpr2.c index 124a43cfc..2aa46062a 100644 --- a/frame/compat/cblas/src/cblas_zhpr2.c +++ b/frame/compat/cblas/src/cblas_zhpr2.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zhpr2.c @@ -15,9 +11,9 @@ #include #include "cblas.h" #include "cblas_f77.h" -void cblas_zhpr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const int N,const void *alpha, const void *X, - const int incX,const void *Y, const int incY, void *Ap) +void cblas_zhpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + f77_int N,const void *alpha, const void *X, + f77_int incX,const void *Y, f77_int incY, void *Ap) { char UL; @@ -31,8 +27,8 @@ void cblas_zhpr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; #else #define F77_N N - #define F77_incX incx - #define F77_incY incy + #define F77_incX incX + #define F77_incY incY #endif int n, i, j; double *x=(double *)X, *xx=(double *)X, *y=(double *)Y, @@ -58,7 +54,7 @@ void cblas_zhpr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_UL = C2F_CHAR(&UL); #endif - F77_zhpr2(F77_UL, &F77_N, alpha, X, &F77_incX, Y, &F77_incY, Ap); + F77_zhpr2(F77_UL, &F77_N, (dcomplex*)alpha, (dcomplex*)X, &F77_incX, (dcomplex*)Y, &F77_incY, (dcomplex*)Ap); } else if (order == CblasRowMajor) { @@ -122,14 +118,14 @@ void cblas_zhpr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, #else if(incX > 0 ) - incx = 1; + incX = 1; else - incx = -1; + incX = -1; if(incY > 0 ) - incy = 1; + incY = 1; else - incy = -1; + incY = -1; #endif } else @@ -137,7 +133,7 @@ void cblas_zhpr2(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, x = (double *) X; y = (void *) Y; } - F77_zhpr2(F77_UL, &F77_N, alpha, y, &F77_incY, x, &F77_incX, Ap); + F77_zhpr2(F77_UL, &F77_N, (dcomplex*)alpha, (dcomplex*)y, &F77_incY, (dcomplex*)x, &F77_incX, (dcomplex*)Ap); } else { diff --git a/frame/compat/cblas/src/cblas_zscal.c b/frame/compat/cblas/src/cblas_zscal.c index 3a45cfece..a66ea6f9d 100644 --- a/frame/compat/cblas/src/cblas_zscal.c +++ b/frame/compat/cblas/src/cblas_zscal.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zscal.c @@ -14,8 +10,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_zscal( const int N, const void *alpha, void *X, - const int incX) +void cblas_zscal( f77_int N, const void *alpha, void *X, + f77_int incX) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; @@ -23,6 +19,6 @@ void cblas_zscal( const int N, const void *alpha, void *X, #define F77_N N #define F77_incX incX #endif - F77_zscal( &F77_N, alpha, X, &F77_incX); + F77_zscal( &F77_N, (dcomplex*)alpha, (dcomplex*)X, &F77_incX); } #endif diff --git a/frame/compat/cblas/src/cblas_zswap.c b/frame/compat/cblas/src/cblas_zswap.c index 4b30c6d6c..6b2eb1a20 100644 --- a/frame/compat/cblas/src/cblas_zswap.c +++ b/frame/compat/cblas/src/cblas_zswap.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_zswap.c @@ -14,8 +10,8 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_zswap( const int N, void *X, const int incX, void *Y, - const int incY) +void cblas_zswap( f77_int N, void *X, f77_int incX, void *Y, + f77_int incY) { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; @@ -24,6 +20,6 @@ void cblas_zswap( const int N, void *X, const int incX, void *Y, #define F77_incX incX #define F77_incY incY #endif - F77_zswap( &F77_N, X, &F77_incX, Y, &F77_incY); + F77_zswap( &F77_N, (dcomplex*)X, &F77_incX, (dcomplex*)Y, &F77_incY); } #endif diff --git a/frame/compat/cblas/src/cblas_zsymm.c b/frame/compat/cblas/src/cblas_zsymm.c index 12dbe513a..9303c5921 100644 --- a/frame/compat/cblas/src/cblas_zsymm.c +++ b/frame/compat/cblas/src/cblas_zsymm.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const int M, const int N, - const void *alpha, const void *A, const int lda, - const void *B, const int ldb, const void *beta, - void *C, const int ldc) +void cblas_zsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, f77_int M, f77_int N, + const void *alpha, const void *A, f77_int lda, + const void *B, f77_int ldb, const void *beta, + void *C, f77_int ldc) { char SD, UL; #ifdef F77_CHAR @@ -72,8 +68,8 @@ void cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, F77_SD = C2F_CHAR(&SD); #endif - F77_zsymm(F77_SD, F77_UL, &F77_M, &F77_N, alpha, A, &F77_lda, - B, &F77_ldb, beta, C, &F77_ldc); + F77_zsymm(F77_SD, F77_UL, &F77_M, &F77_N, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, + (dcomplex*)B, &F77_ldb, (dcomplex*)beta, (dcomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; @@ -102,8 +98,8 @@ void cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, F77_SD = C2F_CHAR(&SD); #endif - F77_zsymm(F77_SD, F77_UL, &F77_N, &F77_M, alpha, A, &F77_lda, - B, &F77_ldb, beta, C, &F77_ldc); + F77_zsymm(F77_SD, F77_UL, &F77_N, &F77_M, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, + (dcomplex*)B, &F77_ldb, (dcomplex*)beta, (dcomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_zsymm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; diff --git a/frame/compat/cblas/src/cblas_zsyr2k.c b/frame/compat/cblas/src/cblas_zsyr2k.c index 02c402d46..c275cc693 100644 --- a/frame/compat/cblas/src/cblas_zsyr2k.c +++ b/frame/compat/cblas/src/cblas_zsyr2k.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const void *alpha, const void *A, const int lda, - const void *B, const int ldb, const void *beta, - void *C, const int ldc) +void cblas_zsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, + const void *alpha, const void *A, f77_int lda, + const void *B, f77_int ldb, const void *beta, + void *C, f77_int ldc) { char UL, TR; #ifdef F77_CHAR @@ -75,8 +71,8 @@ void cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, F77_TR = C2F_CHAR(&TR); #endif - F77_zsyr2k(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda, - B, &F77_ldb, beta, C, &F77_ldc); + F77_zsyr2k(F77_UL, F77_TR, &F77_N, &F77_K, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, + (dcomplex*)B, &F77_ldb, (dcomplex*)beta, (dcomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; @@ -105,7 +101,7 @@ void cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, F77_TR = C2F_CHAR(&TR); #endif - F77_zsyr2k(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda, B, &F77_ldb, beta, C, &F77_ldc); + F77_zsyr2k(F77_UL, F77_TR, &F77_N, &F77_K, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)B, &F77_ldb, (dcomplex*)beta, (dcomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_zsyr2k", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; diff --git a/frame/compat/cblas/src/cblas_zsyrk.c b/frame/compat/cblas/src/cblas_zsyrk.c index 320a35cde..ce2c0e1c8 100644 --- a/frame/compat/cblas/src/cblas_zsyrk.c +++ b/frame/compat/cblas/src/cblas_zsyrk.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,10 +11,10 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE Trans, const int N, const int K, - const void *alpha, const void *A, const int lda, - const void *beta, void *C, const int ldc) +void cblas_zsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE Trans, f77_int N, f77_int K, + const void *alpha, const void *A, f77_int lda, + const void *beta, void *C, f77_int ldc) { char UL, TR; #ifdef F77_CHAR @@ -73,8 +69,8 @@ void cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, F77_TR = C2F_CHAR(&TR); #endif - F77_zsyrk(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda, - beta, C, &F77_ldc); + F77_zsyrk(F77_UL, F77_TR, &F77_N, &F77_K, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, + (dcomplex*)beta, (dcomplex*)C, &F77_ldc); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; @@ -103,8 +99,8 @@ void cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, F77_TR = C2F_CHAR(&TR); #endif - F77_zsyrk(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda, - beta, C, &F77_ldc); + F77_zsyrk(F77_UL, F77_TR, &F77_N, &F77_K, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, + (dcomplex*)beta, (dcomplex*)C, &F77_ldc); } else cblas_xerbla(1, "cblas_zsyrk", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; diff --git a/frame/compat/cblas/src/cblas_ztbmv.c b/frame/compat/cblas/src/cblas_ztbmv.c index 3bb6fe7ad..3c7f58b96 100644 --- a/frame/compat/cblas/src/cblas_ztbmv.c +++ b/frame/compat/cblas/src/cblas_ztbmv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ztbmv.c @@ -13,10 +9,10 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_ztbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const int K, const void *A, const int lda, - void *X, const int incX) +void cblas_ztbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, f77_int K, const void *A, f77_int lda, + void *X, f77_int incX) { char TA; char UL; @@ -78,7 +74,7 @@ void cblas_ztbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif - F77_ztbmv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X, + F77_ztbmv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, (dcomplex*)A, &F77_lda, (dcomplex*)X, &F77_incX); } else if (order == CblasRowMajor) @@ -141,7 +137,7 @@ void cblas_ztbmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_DI = C2F_CHAR(&DI); #endif - F77_ztbmv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X, + F77_ztbmv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, (dcomplex*)A, &F77_lda, (dcomplex*)X, &F77_incX); if (TransA == CblasConjTrans) diff --git a/frame/compat/cblas/src/cblas_ztbsv.c b/frame/compat/cblas/src/cblas_ztbsv.c index f08faf044..56d936e1b 100644 --- a/frame/compat/cblas/src/cblas_ztbsv.c +++ b/frame/compat/cblas/src/cblas_ztbsv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ztbsv.c @@ -13,10 +9,10 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_ztbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const int K, const void *A, const int lda, - void *X, const int incX) +void cblas_ztbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, f77_int K, const void *A, f77_int lda, + void *X, f77_int incX) { char TA; char UL; @@ -78,7 +74,7 @@ void cblas_ztbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif - F77_ztbsv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X, + F77_ztbsv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, (dcomplex*)A, &F77_lda, (dcomplex*)X, &F77_incX); } else if (order == CblasRowMajor) @@ -145,7 +141,7 @@ void cblas_ztbsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_DI = C2F_CHAR(&DI); #endif - F77_ztbsv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X, + F77_ztbsv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, (dcomplex*)A, &F77_lda, (dcomplex*)X, &F77_incX); if (TransA == CblasConjTrans) diff --git a/frame/compat/cblas/src/cblas_ztpmv.c b/frame/compat/cblas/src/cblas_ztpmv.c index 2a6dfd97a..a8717ccc5 100644 --- a/frame/compat/cblas/src/cblas_ztpmv.c +++ b/frame/compat/cblas/src/cblas_ztpmv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ztpmv.c @@ -13,9 +9,9 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_ztpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const void *Ap, void *X, const int incX) +void cblas_ztpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const void *Ap, void *X, f77_int incX) { char TA; char UL; @@ -75,7 +71,7 @@ void cblas_ztpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif - F77_ztpmv( F77_UL, F77_TA, F77_DI, &F77_N, Ap, X, &F77_incX); + F77_ztpmv( F77_UL, F77_TA, F77_DI, &F77_N, (dcomplex*)Ap, (dcomplex*)X, &F77_incX); } else if (order == CblasRowMajor) { @@ -137,7 +133,7 @@ void cblas_ztpmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_DI = C2F_CHAR(&DI); #endif - F77_ztpmv( F77_UL, F77_TA, F77_DI, &F77_N, Ap, X,&F77_incX); + F77_ztpmv( F77_UL, F77_TA, F77_DI, &F77_N, (dcomplex*)Ap, (dcomplex*)X,&F77_incX); if (TransA == CblasConjTrans) { if (N > 0) diff --git a/frame/compat/cblas/src/cblas_ztpsv.c b/frame/compat/cblas/src/cblas_ztpsv.c index 0ccc60faa..4aa3706cc 100644 --- a/frame/compat/cblas/src/cblas_ztpsv.c +++ b/frame/compat/cblas/src/cblas_ztpsv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ztpsv.c @@ -13,9 +9,9 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_ztpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const void *Ap, void *X, const int incX) +void cblas_ztpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const void *Ap, void *X, f77_int incX) { char TA; char UL; @@ -75,7 +71,7 @@ void cblas_ztpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif - F77_ztpsv( F77_UL, F77_TA, F77_DI, &F77_N, Ap, X, &F77_incX); + F77_ztpsv( F77_UL, F77_TA, F77_DI, &F77_N, (dcomplex*)Ap, (dcomplex*)X, &F77_incX); } else if (order == CblasRowMajor) { @@ -141,7 +137,7 @@ void cblas_ztpsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_DI = C2F_CHAR(&DI); #endif - F77_ztpsv( F77_UL, F77_TA, F77_DI, &F77_N, Ap, X,&F77_incX); + F77_ztpsv( F77_UL, F77_TA, F77_DI, &F77_N, (dcomplex*)Ap, (dcomplex*)X,&F77_incX); if (TransA == CblasConjTrans) { diff --git a/frame/compat/cblas/src/cblas_ztrmm.c b/frame/compat/cblas/src/cblas_ztrmm.c index df5905eda..6fa8a2ca9 100644 --- a/frame/compat/cblas/src/cblas_ztrmm.c +++ b/frame/compat/cblas/src/cblas_ztrmm.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_DIAG Diag, const int M, const int N, - const void *alpha, const void *A, const int lda, - void *B, const int ldb) +void cblas_ztrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + enum CBLAS_DIAG Diag, f77_int M, f77_int N, + const void *alpha, const void *A, f77_int lda, + void *B, f77_int ldb) { char UL, TA, SD, DI; #ifdef F77_CHAR @@ -94,7 +90,7 @@ void cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, F77_DI = C2F_CHAR(&DI); #endif - F77_ztrmm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_M, &F77_N, alpha, A, &F77_lda, B, &F77_ldb); + F77_ztrmm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_M, &F77_N, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)B, &F77_ldb); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; @@ -146,7 +142,7 @@ void cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, F77_DI = C2F_CHAR(&DI); #endif - F77_ztrmm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_N, &F77_M, alpha, A, &F77_lda, B, &F77_ldb); + F77_ztrmm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_N, &F77_M, (dcomplex*)alpha, (dcomplex*)A, &F77_lda, (dcomplex*)B, &F77_ldb); } else cblas_xerbla(1, "cblas_ztrmm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; diff --git a/frame/compat/cblas/src/cblas_ztrmv.c b/frame/compat/cblas/src/cblas_ztrmv.c index 02c0c8685..590105d62 100644 --- a/frame/compat/cblas/src/cblas_ztrmv.c +++ b/frame/compat/cblas/src/cblas_ztrmv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ztrmv.c @@ -13,10 +9,10 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_ztrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const void *A, const int lda, - void *X, const int incX) +void cblas_ztrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const void *A, f77_int lda, + void *X, f77_int incX) { char TA; @@ -78,7 +74,7 @@ void cblas_ztrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif - F77_ztrmv( F77_UL, F77_TA, F77_DI, &F77_N, A, &F77_lda, X, + F77_ztrmv( F77_UL, F77_TA, F77_DI, &F77_N, (dcomplex*)A, &F77_lda, (dcomplex*)X, &F77_incX); } else if (order == CblasRowMajor) @@ -140,7 +136,7 @@ void cblas_ztrmv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif - F77_ztrmv( F77_UL, F77_TA, F77_DI, &F77_N, A, &F77_lda, X, + F77_ztrmv( F77_UL, F77_TA, F77_DI, &F77_N, (dcomplex*)A, &F77_lda, (dcomplex*)X, &F77_incX); if (TransA == CblasConjTrans) { diff --git a/frame/compat/cblas/src/cblas_ztrsm.c b/frame/compat/cblas/src/cblas_ztrsm.c index a0c6da8a1..a68c70e1c 100644 --- a/frame/compat/cblas/src/cblas_ztrsm.c +++ b/frame/compat/cblas/src/cblas_ztrsm.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * @@ -15,11 +11,11 @@ #include "cblas.h" #include "cblas_f77.h" -void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, - const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, - const enum CBLAS_DIAG Diag, const int M, const int N, - const void *alpha, const void *A, const int lda, - void *B, const int ldb) +void cblas_ztrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, + enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, + enum CBLAS_DIAG Diag, f77_int M, f77_int N, + const void *alpha, const void *A, f77_int lda, + void *B, f77_int ldb) { char UL, TA, SD, DI; #ifdef F77_CHAR @@ -96,8 +92,8 @@ void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, F77_DI = C2F_CHAR(&DI); #endif - F77_ztrsm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_M, &F77_N, alpha, A, - &F77_lda, B, &F77_ldb); + F77_ztrsm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_M, &F77_N, (dcomplex*)alpha, (dcomplex*)A, + &F77_lda, (dcomplex*)B, &F77_ldb); } else if (Order == CblasRowMajor) { RowMajorStrg = 1; @@ -151,8 +147,8 @@ void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, #endif - F77_ztrsm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_N, &F77_M, alpha, A, - &F77_lda, B, &F77_ldb); + F77_ztrsm(F77_SD, F77_UL, F77_TA, F77_DI, &F77_N, &F77_M, (dcomplex*)alpha, (dcomplex*)A, + &F77_lda, (dcomplex*)B, &F77_ldb); } else cblas_xerbla(1, "cblas_ztrsm", "Illegal Order setting, %d\n", Order); CBLAS_CallFromC = 0; diff --git a/frame/compat/cblas/src/cblas_ztrsv.c b/frame/compat/cblas/src/cblas_ztrsv.c index 63afc7ea0..1a541f7b1 100644 --- a/frame/compat/cblas/src/cblas_ztrsv.c +++ b/frame/compat/cblas/src/cblas_ztrsv.c @@ -1,8 +1,4 @@ -#include "bli_config.h" -#include "bli_config_macro_defs.h" -#include "bli_system.h" -#include "bli_type_defs.h" -#include "bli_cblas.h" +#include "blis.h" #ifdef BLIS_ENABLE_CBLAS /* * cblas_ztrsv.c @@ -13,10 +9,10 @@ */ #include "cblas.h" #include "cblas_f77.h" -void cblas_ztrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, - const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, - const int N, const void *A, const int lda, void *X, - const int incX) +void cblas_ztrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, + enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, + f77_int N, const void *A, f77_int lda, void *X, + f77_int incX) { char TA; char UL; @@ -77,7 +73,7 @@ void cblas_ztrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif - F77_ztrsv( F77_UL, F77_TA, F77_DI, &F77_N, A, &F77_lda, X, + F77_ztrsv( F77_UL, F77_TA, F77_DI, &F77_N, (dcomplex*)A, &F77_lda, (dcomplex*)X, &F77_incX); } else if (order == CblasRowMajor) @@ -140,7 +136,7 @@ void cblas_ztrsv(const enum CBLAS_ORDER order, const enum CBLAS_UPLO Uplo, F77_TA = C2F_CHAR(&TA); F77_DI = C2F_CHAR(&DI); #endif - F77_ztrsv( F77_UL, F77_TA, F77_DI, &F77_N, A, &F77_lda, X, + F77_ztrsv( F77_UL, F77_TA, F77_DI, &F77_N, (dcomplex*)A, &F77_lda, (dcomplex*)X, &F77_incX); if (TransA == CblasConjTrans) { diff --git a/frame/compat/check/bla_gemm_check.c b/frame/compat/check/bla_gemm_check.c index aa454e9e5..63ed6d09b 100644 --- a/frame/compat/check/bla_gemm_check.c +++ b/frame/compat/check/bla_gemm_check.c @@ -38,16 +38,16 @@ void bla_gemm_check ( - char* dt_str, - char* op_str, - f77_char* transa, - f77_char* transb, - f77_int* m, - f77_int* n, - f77_int* k, - f77_int* lda, - f77_int* ldb, - f77_int* ldc + const char* dt_str, + const char* op_str, + const f77_char* transa, + const f77_char* transb, + const f77_int* m, + const f77_int* n, + const f77_int* k, + const f77_int* lda, + const f77_int* ldb, + const f77_int* ldc ) { f77_int info = 0; diff --git a/frame/compat/check/bla_gemm_check.h b/frame/compat/check/bla_gemm_check.h index 338bc36a7..08daf5848 100644 --- a/frame/compat/check/bla_gemm_check.h +++ b/frame/compat/check/bla_gemm_check.h @@ -36,16 +36,16 @@ void bla_gemm_check ( - char* dt_str, - char* op_str, - f77_char* transa, - f77_char* transb, - f77_int* m, - f77_int* n, - f77_int* k, - f77_int* lda, - f77_int* ldb, - f77_int* ldc + const char* dt_str, + const char* op_str, + const f77_char* transa, + const f77_char* transb, + const f77_int* m, + const f77_int* n, + const f77_int* k, + const f77_int* lda, + const f77_int* ldb, + const f77_int* ldc ); #endif diff --git a/frame/compat/check/bla_gemv_check.c b/frame/compat/check/bla_gemv_check.c index bd135ac52..a731c6f62 100644 --- a/frame/compat/check/bla_gemv_check.c +++ b/frame/compat/check/bla_gemv_check.c @@ -38,14 +38,14 @@ void bla_gemv_check ( - char* dt_str, - char* op_str, - f77_char* transa, - f77_int* m, - f77_int* n, - f77_int* lda, - f77_int* incx, - f77_int* incy + const char* dt_str, + const char* op_str, + const f77_char* transa, + const f77_int* m, + const f77_int* n, + const f77_int* lda, + const f77_int* incx, + const f77_int* incy ) { f77_int info = 0; diff --git a/frame/compat/check/bla_gemv_check.h b/frame/compat/check/bla_gemv_check.h index 452d900cc..1fae167d3 100644 --- a/frame/compat/check/bla_gemv_check.h +++ b/frame/compat/check/bla_gemv_check.h @@ -36,14 +36,14 @@ void bla_gemv_check ( - char* dt_str, - char* op_str, - f77_char* transa, - f77_int* m, - f77_int* n, - f77_int* lda, - f77_int* incx, - f77_int* incy + const char* dt_str, + const char* op_str, + const f77_char* transa, + const f77_int* m, + const f77_int* n, + const f77_int* lda, + const f77_int* incx, + const f77_int* incy ); #endif diff --git a/frame/compat/check/bla_ger_check.c b/frame/compat/check/bla_ger_check.c index 7f8190c80..54656b873 100644 --- a/frame/compat/check/bla_ger_check.c +++ b/frame/compat/check/bla_ger_check.c @@ -38,13 +38,13 @@ void bla_ger_check ( - char* dt_str, - char* op_str, - f77_int* m, - f77_int* n, - f77_int* incx, - f77_int* incy, - f77_int* lda + const char* dt_str, + const char* op_str, + const f77_int* m, + const f77_int* n, + const f77_int* incx, + const f77_int* incy, + const f77_int* lda ) { f77_int info = 0; diff --git a/frame/compat/check/bla_ger_check.h b/frame/compat/check/bla_ger_check.h index a2733b570..ae4b5f295 100644 --- a/frame/compat/check/bla_ger_check.h +++ b/frame/compat/check/bla_ger_check.h @@ -36,13 +36,13 @@ void bla_ger_check ( - char* dt_str, - char* op_str, - f77_int* m, - f77_int* n, - f77_int* lda, - f77_int* incx, - f77_int* incy + const char* dt_str, + const char* op_str, + const f77_int* m, + const f77_int* n, + const f77_int* lda, + const f77_int* incx, + const f77_int* incy ); #endif diff --git a/frame/compat/check/bla_hemm_check.c b/frame/compat/check/bla_hemm_check.c index 9dd8b6500..6f8ffa07b 100644 --- a/frame/compat/check/bla_hemm_check.c +++ b/frame/compat/check/bla_hemm_check.c @@ -38,15 +38,15 @@ void bla_hemm_check ( - char* dt_str, - char* op_str, - f77_char* sidea, - f77_char* uploa, - f77_int* m, - f77_int* n, - f77_int* lda, - f77_int* ldb, - f77_int* ldc + const char* dt_str, + const char* op_str, + const f77_char* sidea, + const f77_char* uploa, + const f77_int* m, + const f77_int* n, + const f77_int* lda, + const f77_int* ldb, + const f77_int* ldc ) { f77_int info = 0; diff --git a/frame/compat/check/bla_hemm_check.h b/frame/compat/check/bla_hemm_check.h index 0a5323e4c..b99c9641b 100644 --- a/frame/compat/check/bla_hemm_check.h +++ b/frame/compat/check/bla_hemm_check.h @@ -36,15 +36,15 @@ void bla_hemm_check ( - char* dt_str, - char* op_str, - f77_char* sidea, - f77_char* uploa, - f77_int* m, - f77_int* n, - f77_int* lda, - f77_int* ldb, - f77_int* ldc + const char* dt_str, + const char* op_str, + const f77_char* sidea, + const f77_char* uploa, + const f77_int* m, + const f77_int* n, + const f77_int* lda, + const f77_int* ldb, + const f77_int* ldc ); #endif diff --git a/frame/compat/check/bla_hemv_check.c b/frame/compat/check/bla_hemv_check.c index ad6209890..c671f6f4d 100644 --- a/frame/compat/check/bla_hemv_check.c +++ b/frame/compat/check/bla_hemv_check.c @@ -38,13 +38,13 @@ void bla_hemv_check ( - char* dt_str, - char* op_str, - f77_char* uploa, - f77_int* m, - f77_int* lda, - f77_int* incx, - f77_int* incy + const char* dt_str, + const char* op_str, + const f77_char* uploa, + const f77_int* m, + const f77_int* lda, + const f77_int* incx, + const f77_int* incy ) { f77_int info = 0; diff --git a/frame/compat/check/bla_hemv_check.h b/frame/compat/check/bla_hemv_check.h index 742abd8cb..e203c8340 100644 --- a/frame/compat/check/bla_hemv_check.h +++ b/frame/compat/check/bla_hemv_check.h @@ -36,13 +36,13 @@ void bla_hemv_check ( - char* dt_str, - char* op_str, - f77_char* uploa, - f77_int* m, - f77_int* lda, - f77_int* incx, - f77_int* incy + const char* dt_str, + const char* op_str, + const f77_char* uploa, + const f77_int* m, + const f77_int* lda, + const f77_int* incx, + const f77_int* incy ); #endif diff --git a/frame/compat/check/bla_her2_check.c b/frame/compat/check/bla_her2_check.c index 7b989fbe0..7e1f41fde 100644 --- a/frame/compat/check/bla_her2_check.c +++ b/frame/compat/check/bla_her2_check.c @@ -38,13 +38,13 @@ void bla_her2_check ( - char* dt_str, - char* op_str, - f77_char* uploc, - f77_int* m, - f77_int* incx, - f77_int* incy, - f77_int* lda + const char* dt_str, + const char* op_str, + const f77_char* uploc, + const f77_int* m, + const f77_int* incx, + const f77_int* incy, + const f77_int* lda ) { f77_int info = 0; diff --git a/frame/compat/check/bla_her2_check.h b/frame/compat/check/bla_her2_check.h index 684080768..f90ccae06 100644 --- a/frame/compat/check/bla_her2_check.h +++ b/frame/compat/check/bla_her2_check.h @@ -36,13 +36,13 @@ void bla_her2_check ( - char* dt_str, - char* op_str, - f77_char* uploa, - f77_int* m, - f77_int* incx, - f77_int* incy, - f77_int* lda + const char* dt_str, + const char* op_str, + const f77_char* uploa, + const f77_int* m, + const f77_int* incx, + const f77_int* incy, + const f77_int* lda ); #endif diff --git a/frame/compat/check/bla_her2k_check.c b/frame/compat/check/bla_her2k_check.c index 151c18308..61b8ce7d4 100644 --- a/frame/compat/check/bla_her2k_check.c +++ b/frame/compat/check/bla_her2k_check.c @@ -38,15 +38,15 @@ void bla_her2k_check ( - char* dt_str, - char* op_str, - f77_char* uploa, - f77_char* trans, - f77_int* m, - f77_int* k, - f77_int* lda, - f77_int* ldb, - f77_int* ldc + const char* dt_str, + const char* op_str, + const f77_char* uploa, + const f77_char* trans, + const f77_int* m, + const f77_int* k, + const f77_int* lda, + const f77_int* ldb, + const f77_int* ldc ) { f77_int info = 0; diff --git a/frame/compat/check/bla_her2k_check.h b/frame/compat/check/bla_her2k_check.h index 9b5657481..e5276d215 100644 --- a/frame/compat/check/bla_her2k_check.h +++ b/frame/compat/check/bla_her2k_check.h @@ -36,15 +36,15 @@ void bla_her2k_check ( - char* dt_str, - char* op_str, - f77_char* uploa, - f77_char* transa, - f77_int* m, - f77_int* k, - f77_int* lda, - f77_int* ldb, - f77_int* ldc + const char* dt_str, + const char* op_str, + const f77_char* uploa, + const f77_char* transa, + const f77_int* m, + const f77_int* k, + const f77_int* lda, + const f77_int* ldb, + const f77_int* ldc ); #endif diff --git a/frame/compat/check/bla_her_check.c b/frame/compat/check/bla_her_check.c index 9ebde741c..9a42a71c0 100644 --- a/frame/compat/check/bla_her_check.c +++ b/frame/compat/check/bla_her_check.c @@ -38,12 +38,12 @@ void bla_her_check ( - char* dt_str, - char* op_str, - f77_char* uploc, - f77_int* m, - f77_int* incx, - f77_int* lda + const char* dt_str, + const char* op_str, + const f77_char* uploc, + const f77_int* m, + const f77_int* incx, + const f77_int* lda ) { f77_int info = 0; diff --git a/frame/compat/check/bla_her_check.h b/frame/compat/check/bla_her_check.h index 18a9fbb89..90869e757 100644 --- a/frame/compat/check/bla_her_check.h +++ b/frame/compat/check/bla_her_check.h @@ -36,12 +36,12 @@ void bla_her_check ( - char* dt_str, - char* op_str, - f77_char* uploa, - f77_int* m, - f77_int* incx, - f77_int* lda + const char* dt_str, + const char* op_str, + const f77_char* uploa, + const f77_int* m, + const f77_int* incx, + const f77_int* lda ); #endif diff --git a/frame/compat/check/bla_herk_check.c b/frame/compat/check/bla_herk_check.c index 65195cd88..836c4dcdf 100644 --- a/frame/compat/check/bla_herk_check.c +++ b/frame/compat/check/bla_herk_check.c @@ -38,14 +38,14 @@ void bla_herk_check ( - char* dt_str, - char* op_str, - f77_char* uploc, - f77_char* transa, - f77_int* m, - f77_int* k, - f77_int* lda, - f77_int* ldc + const char* dt_str, + const char* op_str, + const f77_char* uploc, + const f77_char* transa, + const f77_int* m, + const f77_int* k, + const f77_int* lda, + const f77_int* ldc ) { f77_int info = 0; diff --git a/frame/compat/check/bla_herk_check.h b/frame/compat/check/bla_herk_check.h index 7abc9cceb..4b518399e 100644 --- a/frame/compat/check/bla_herk_check.h +++ b/frame/compat/check/bla_herk_check.h @@ -36,14 +36,14 @@ void bla_herk_check ( - char* dt_str, - char* op_str, - f77_char* uploc, - f77_char* transa, - f77_int* m, - f77_int* k, - f77_int* lda, - f77_int* ldc + const char* dt_str, + const char* op_str, + const f77_char* uploc, + const f77_char* transa, + const f77_int* m, + const f77_int* k, + const f77_int* lda, + const f77_int* ldc ); #endif diff --git a/frame/compat/check/bla_symm_check.c b/frame/compat/check/bla_symm_check.c index e62540c39..4576a2387 100644 --- a/frame/compat/check/bla_symm_check.c +++ b/frame/compat/check/bla_symm_check.c @@ -38,15 +38,15 @@ void bla_symm_check ( - char* dt_str, - char* op_str, - f77_char* sidea, - f77_char* uploa, - f77_int* m, - f77_int* n, - f77_int* lda, - f77_int* ldb, - f77_int* ldc + const char* dt_str, + const char* op_str, + const f77_char* sidea, + const f77_char* uploa, + const f77_int* m, + const f77_int* n, + const f77_int* lda, + const f77_int* ldb, + const f77_int* ldc ) { bla_hemm_check diff --git a/frame/compat/check/bla_symm_check.h b/frame/compat/check/bla_symm_check.h index 2bf18d085..faf6db548 100644 --- a/frame/compat/check/bla_symm_check.h +++ b/frame/compat/check/bla_symm_check.h @@ -36,15 +36,15 @@ void bla_symm_check ( - char* dt_str, - char* op_str, - f77_char* sidea, - f77_char* uploa, - f77_int* m, - f77_int* n, - f77_int* lda, - f77_int* ldb, - f77_int* ldc + const char* dt_str, + const char* op_str, + const f77_char* sidea, + const f77_char* uploa, + const f77_int* m, + const f77_int* n, + const f77_int* lda, + const f77_int* ldb, + const f77_int* ldc ); #endif diff --git a/frame/compat/check/bla_symv_check.c b/frame/compat/check/bla_symv_check.c index 355a16766..25875c234 100644 --- a/frame/compat/check/bla_symv_check.c +++ b/frame/compat/check/bla_symv_check.c @@ -38,13 +38,13 @@ void bla_symv_check ( - char* dt_str, - char* op_str, - f77_char* uploa, - f77_int* m, - f77_int* lda, - f77_int* incx, - f77_int* incy + const char* dt_str, + const char* op_str, + const f77_char* uploa, + const f77_int* m, + const f77_int* lda, + const f77_int* incx, + const f77_int* incy ) { bla_hemv_check diff --git a/frame/compat/check/bla_symv_check.h b/frame/compat/check/bla_symv_check.h index 8149cdf54..8134a84c0 100644 --- a/frame/compat/check/bla_symv_check.h +++ b/frame/compat/check/bla_symv_check.h @@ -36,13 +36,13 @@ void bla_symv_check ( - char* dt_str, - char* op_str, - f77_char* uploa, - f77_int* m, - f77_int* lda, - f77_int* incx, - f77_int* incy + const char* dt_str, + const char* op_str, + const f77_char* uploa, + const f77_int* m, + const f77_int* lda, + const f77_int* incx, + const f77_int* incy ); #endif diff --git a/frame/compat/check/bla_syr2_check.c b/frame/compat/check/bla_syr2_check.c index 82ffcc4e7..0f56b9796 100644 --- a/frame/compat/check/bla_syr2_check.c +++ b/frame/compat/check/bla_syr2_check.c @@ -38,13 +38,13 @@ void bla_syr2_check ( - char* dt_str, - char* op_str, - f77_char* uploc, - f77_int* m, - f77_int* incx, - f77_int* incy, - f77_int* lda + const char* dt_str, + const char* op_str, + const f77_char* uploc, + const f77_int* m, + const f77_int* incx, + const f77_int* incy, + const f77_int* lda ) { bla_her2_check diff --git a/frame/compat/check/bla_syr2_check.h b/frame/compat/check/bla_syr2_check.h index 782a32149..01b8a24b9 100644 --- a/frame/compat/check/bla_syr2_check.h +++ b/frame/compat/check/bla_syr2_check.h @@ -36,13 +36,13 @@ void bla_syr2_check ( - char* dt_str, - char* op_str, - f77_char* uploa, - f77_int* m, - f77_int* incx, - f77_int* incy, - f77_int* lda + const char* dt_str, + const char* op_str, + const f77_char* uploa, + const f77_int* m, + const f77_int* incx, + const f77_int* incy, + const f77_int* lda ); #endif diff --git a/frame/compat/check/bla_syr2k_check.c b/frame/compat/check/bla_syr2k_check.c index 78386fbab..f47ed6358 100644 --- a/frame/compat/check/bla_syr2k_check.c +++ b/frame/compat/check/bla_syr2k_check.c @@ -38,15 +38,15 @@ void bla_syr2k_check ( - char* dt_str, - char* op_str, - f77_char* uploa, - f77_char* trans, - f77_int* m, - f77_int* k, - f77_int* lda, - f77_int* ldb, - f77_int* ldc + const char* dt_str, + const char* op_str, + const f77_char* uploa, + const f77_char* trans, + const f77_int* m, + const f77_int* k, + const f77_int* lda, + const f77_int* ldb, + const f77_int* ldc ) { f77_int info = 0; diff --git a/frame/compat/check/bla_syr2k_check.h b/frame/compat/check/bla_syr2k_check.h index d9a3cd0f3..752af0c32 100644 --- a/frame/compat/check/bla_syr2k_check.h +++ b/frame/compat/check/bla_syr2k_check.h @@ -36,15 +36,15 @@ void bla_syr2k_check ( - char* dt_str, - char* op_str, - f77_char* uploa, - f77_char* trans, - f77_int* m, - f77_int* k, - f77_int* lda, - f77_int* ldb, - f77_int* ldc + const char* dt_str, + const char* op_str, + const f77_char* uploa, + const f77_char* trans, + const f77_int* m, + const f77_int* k, + const f77_int* lda, + const f77_int* ldb, + const f77_int* ldc ); #endif diff --git a/frame/compat/check/bla_syr_check.c b/frame/compat/check/bla_syr_check.c index 12647837c..af4a76f12 100644 --- a/frame/compat/check/bla_syr_check.c +++ b/frame/compat/check/bla_syr_check.c @@ -38,12 +38,12 @@ void bla_syr_check ( - char* dt_str, - char* op_str, - f77_char* uploc, - f77_int* m, - f77_int* incx, - f77_int* lda + const char* dt_str, + const char* op_str, + const f77_char* uploc, + const f77_int* m, + const f77_int* incx, + const f77_int* lda ) { bla_her_check diff --git a/frame/compat/check/bla_syr_check.h b/frame/compat/check/bla_syr_check.h index 93d368d11..9c28dd1b1 100644 --- a/frame/compat/check/bla_syr_check.h +++ b/frame/compat/check/bla_syr_check.h @@ -36,12 +36,12 @@ void bla_syr_check ( - char* dt_str, - char* op_str, - f77_char* uploa, - f77_int* m, - f77_int* incx, - f77_int* lda + const char* dt_str, + const char* op_str, + const f77_char* uploa, + const f77_int* m, + const f77_int* incx, + const f77_int* lda ); #endif diff --git a/frame/compat/check/bla_syrk_check.c b/frame/compat/check/bla_syrk_check.c index cbc9eb59e..2addb151e 100644 --- a/frame/compat/check/bla_syrk_check.c +++ b/frame/compat/check/bla_syrk_check.c @@ -38,14 +38,14 @@ void bla_syrk_check ( - char* dt_str, - char* op_str, - f77_char* uploc, - f77_char* transa, - f77_int* m, - f77_int* k, - f77_int* lda, - f77_int* ldc + const char* dt_str, + const char* op_str, + const f77_char* uploc, + const f77_char* transa, + const f77_int* m, + const f77_int* k, + const f77_int* lda, + const f77_int* ldc ) { f77_int info = 0; diff --git a/frame/compat/check/bla_syrk_check.h b/frame/compat/check/bla_syrk_check.h index 7ed796e15..c44f68440 100644 --- a/frame/compat/check/bla_syrk_check.h +++ b/frame/compat/check/bla_syrk_check.h @@ -36,14 +36,14 @@ void bla_syrk_check ( - char* dt_str, - char* op_str, - f77_char* uploc, - f77_char* transa, - f77_int* m, - f77_int* k, - f77_int* lda, - f77_int* ldc + const char* dt_str, + const char* op_str, + const f77_char* uploc, + const f77_char* transa, + const f77_int* m, + const f77_int* k, + const f77_int* lda, + const f77_int* ldc ); #endif diff --git a/frame/compat/check/bla_trmm_check.c b/frame/compat/check/bla_trmm_check.c index e5f73f051..6f1568eaf 100644 --- a/frame/compat/check/bla_trmm_check.c +++ b/frame/compat/check/bla_trmm_check.c @@ -38,16 +38,16 @@ void bla_trmm_check ( - char* dt_str, - char* op_str, - f77_char* sidea, - f77_char* uploa, - f77_char* transa, - f77_char* diaga, - f77_int* m, - f77_int* n, - f77_int* lda, - f77_int* ldb + const char* dt_str, + const char* op_str, + const f77_char* sidea, + const f77_char* uploa, + const f77_char* transa, + const f77_char* diaga, + const f77_int* m, + const f77_int* n, + const f77_int* lda, + const f77_int* ldb ) { f77_int info = 0; diff --git a/frame/compat/check/bla_trmm_check.h b/frame/compat/check/bla_trmm_check.h index f004c2534..6b68102ef 100644 --- a/frame/compat/check/bla_trmm_check.h +++ b/frame/compat/check/bla_trmm_check.h @@ -36,16 +36,16 @@ void bla_trmm_check ( - char* dt_str, - char* op_str, - f77_char* sidea, - f77_char* uploa, - f77_char* transa, - f77_char* diaga, - f77_int* m, - f77_int* n, - f77_int* lda, - f77_int* ldb + const char* dt_str, + const char* op_str, + const f77_char* sidea, + const f77_char* uploa, + const f77_char* transa, + const f77_char* diaga, + const f77_int* m, + const f77_int* n, + const f77_int* lda, + const f77_int* ldb ); #endif diff --git a/frame/compat/check/bla_trmv_check.c b/frame/compat/check/bla_trmv_check.c index 04ea061ef..9f8a8ba36 100644 --- a/frame/compat/check/bla_trmv_check.c +++ b/frame/compat/check/bla_trmv_check.c @@ -38,14 +38,14 @@ void bla_trmv_check ( - char* dt_str, - char* op_str, - f77_char* uploa, - f77_char* transa, - f77_char* diaga, - f77_int* m, - f77_int* lda, - f77_int* incx + const char* dt_str, + const char* op_str, + const f77_char* uploa, + const f77_char* transa, + const f77_char* diaga, + const f77_int* m, + const f77_int* lda, + const f77_int* incx ) { f77_int info = 0; diff --git a/frame/compat/check/bla_trmv_check.h b/frame/compat/check/bla_trmv_check.h index 81d2be9d5..6ac6fb88f 100644 --- a/frame/compat/check/bla_trmv_check.h +++ b/frame/compat/check/bla_trmv_check.h @@ -36,14 +36,14 @@ void bla_trmv_check ( - char* dt_str, - char* op_str, - f77_char* uploa, - f77_char* transa, - f77_char* diaga, - f77_int* m, - f77_int* lda, - f77_int* incx + const char* dt_str, + const char* op_str, + const f77_char* uploa, + const f77_char* transa, + const f77_char* diaga, + const f77_int* m, + const f77_int* lda, + const f77_int* incx ); #endif diff --git a/frame/compat/check/bla_trsm_check.c b/frame/compat/check/bla_trsm_check.c index f3e878c4e..2f200baa7 100644 --- a/frame/compat/check/bla_trsm_check.c +++ b/frame/compat/check/bla_trsm_check.c @@ -38,16 +38,16 @@ void bla_trsm_check ( - char* dt_str, - char* op_str, - f77_char* sidea, - f77_char* uploa, - f77_char* transa, - f77_char* diaga, - f77_int* m, - f77_int* n, - f77_int* lda, - f77_int* ldb + const char* dt_str, + const char* op_str, + const f77_char* sidea, + const f77_char* uploa, + const f77_char* transa, + const f77_char* diaga, + const f77_int* m, + const f77_int* n, + const f77_int* lda, + const f77_int* ldb ) { bla_trmm_check diff --git a/frame/compat/check/bla_trsm_check.h b/frame/compat/check/bla_trsm_check.h index dd45cce05..d5f303951 100644 --- a/frame/compat/check/bla_trsm_check.h +++ b/frame/compat/check/bla_trsm_check.h @@ -36,16 +36,16 @@ void bla_trsm_check ( - char* dt_str, - char* op_str, - f77_char* sidea, - f77_char* uploa, - f77_char* transa, - f77_char* diaga, - f77_int* m, - f77_int* n, - f77_int* lda, - f77_int* ldb + const char* dt_str, + const char* op_str, + const f77_char* sidea, + const f77_char* uploa, + const f77_char* transa, + const f77_char* diaga, + const f77_int* m, + const f77_int* n, + const f77_int* lda, + const f77_int* ldb ); #endif diff --git a/frame/compat/check/bla_trsv_check.c b/frame/compat/check/bla_trsv_check.c index 913ac502d..0b6b66062 100644 --- a/frame/compat/check/bla_trsv_check.c +++ b/frame/compat/check/bla_trsv_check.c @@ -38,14 +38,14 @@ void bla_trsv_check ( - char* dt_str, - char* op_str, - f77_char* uploa, - f77_char* transa, - f77_char* diaga, - f77_int* m, - f77_int* lda, - f77_int* incx + const char* dt_str, + const char* op_str, + const f77_char* uploa, + const f77_char* transa, + const f77_char* diaga, + const f77_int* m, + const f77_int* lda, + const f77_int* incx ) { bla_trmv_check diff --git a/frame/compat/check/bla_trsv_check.h b/frame/compat/check/bla_trsv_check.h index 42cd00689..a4e9e7d6c 100644 --- a/frame/compat/check/bla_trsv_check.h +++ b/frame/compat/check/bla_trsv_check.h @@ -36,14 +36,14 @@ void bla_trsv_check ( - char* dt_str, - char* op_str, - f77_char* uploa, - f77_char* transa, - f77_char* diaga, - f77_int* m, - f77_int* lda, - f77_int* incx + const char* dt_str, + const char* op_str, + const f77_char* uploa, + const f77_char* transa, + const f77_char* diaga, + const f77_int* m, + const f77_int* lda, + const f77_int* incx ); #endif diff --git a/frame/compat/f2c/bla_gbmv.c b/frame/compat/f2c/bla_gbmv.c index 0c66786d7..d3db8a618 100644 --- a/frame/compat/f2c/bla_gbmv.c +++ b/frame/compat/f2c/bla_gbmv.c @@ -41,22 +41,22 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(c,gbmv)(bla_character *trans, bla_integer *m, bla_integer *n, bla_integer *kl, bla_integer *ku, bla_scomplex *alpha, bla_scomplex *a, bla_integer *lda, bla_scomplex *x, bla_integer *incx, bla_scomplex *beta, bla_scomplex *y, bla_integer *incy) +/* Subroutine */ int PASTEF77(c,gbmv)(const bla_character *trans, const bla_integer *m, const bla_integer *n, const bla_integer *kl, const bla_integer *ku, const bla_scomplex *alpha, const bla_scomplex *a, const bla_integer *lda, const bla_scomplex *x, const bla_integer *incx, const bla_scomplex *beta, bla_scomplex *y, const bla_integer *incy) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; bla_scomplex q__1, q__2, q__3; /* Builtin functions */ - void bla_r_cnjg(bla_scomplex *, bla_scomplex *); + //void bla_r_cnjg(bla_scomplex *, bla_scomplex *); /* Local variables */ bla_integer info; bla_scomplex temp; bla_integer lenx, leny, i__, j, k; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer ix, iy, jx, jy, kx, ky; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical noconj; bla_integer kup1; @@ -482,7 +482,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(d,gbmv)(bla_character *trans, bla_integer *m, bla_integer *n, bla_integer *kl, bla_integer *ku, bla_double *alpha, bla_double *a, bla_integer *lda, bla_double *x, bla_integer *incx, bla_double *beta, bla_double *y, bla_integer *incy) +/* Subroutine */ int PASTEF77(d,gbmv)(const bla_character *trans, const bla_integer *m, const bla_integer *n, const bla_integer *kl, const bla_integer *ku, const bla_double *alpha, const bla_double *a, const bla_integer *lda, const bla_double *x, const bla_integer *incx, const bla_double *beta, bla_double *y, const bla_integer *incy) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; @@ -491,9 +491,9 @@ bla_integer info; bla_double temp; bla_integer lenx, leny, i__, j, k; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer ix, iy, jx, jy, kx, ky; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_integer kup1; /* .. Scalar Arguments .. */ @@ -838,7 +838,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(s,gbmv)(bla_character *trans, bla_integer *m, bla_integer *n, bla_integer *kl, bla_integer *ku, bla_real *alpha, bla_real *a, bla_integer *lda, bla_real *x, bla_integer * incx, bla_real *beta, bla_real *y, bla_integer *incy) +/* Subroutine */ int PASTEF77(s,gbmv)(const bla_character *trans, const bla_integer *m, const bla_integer *n, const bla_integer *kl, const bla_integer *ku, const bla_real *alpha, const bla_real *a, const bla_integer *lda, const bla_real *x, const bla_integer * incx, const bla_real *beta, bla_real *y, const bla_integer *incy) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; @@ -847,9 +847,9 @@ bla_integer info; bla_real temp; bla_integer lenx, leny, i__, j, k; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer ix, iy, jx, jy, kx, ky; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_integer kup1; /* .. Scalar Arguments .. */ @@ -1194,22 +1194,22 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(z,gbmv)(bla_character *trans, bla_integer *m, bla_integer *n, bla_integer *kl, bla_integer *ku, bla_dcomplex *alpha, bla_dcomplex *a, bla_integer *lda, bla_dcomplex *x, bla_integer *incx, bla_dcomplex *beta, bla_dcomplex * y, bla_integer *incy) +/* Subroutine */ int PASTEF77(z,gbmv)(const bla_character *trans, const bla_integer *m, const bla_integer *n, const bla_integer *kl, const bla_integer *ku, const bla_dcomplex *alpha, const bla_dcomplex *a, const bla_integer *lda, const bla_dcomplex *x, const bla_integer *incx, const bla_dcomplex *beta, bla_dcomplex * y, const bla_integer *incy) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6; bla_dcomplex z__1, z__2, z__3; /* Builtin functions */ - void bla_d_cnjg(bla_dcomplex *, bla_dcomplex *); + //void bla_d_cnjg(bla_dcomplex *, bla_dcomplex *); /* Local variables */ bla_integer info; bla_dcomplex temp; bla_integer lenx, leny, i__, j, k; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer ix, iy, jx, jy, kx, ky; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical noconj; bla_integer kup1; diff --git a/frame/compat/f2c/bla_gbmv.h b/frame/compat/f2c/bla_gbmv.h index ec2bffe17..a060a0438 100644 --- a/frame/compat/f2c/bla_gbmv.h +++ b/frame/compat/f2c/bla_gbmv.h @@ -34,9 +34,9 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -int PASTEF77(c,gbmv)(bla_character *trans, bla_integer *m, bla_integer *n, bla_integer *kl, bla_integer *ku, bla_scomplex *alpha, bla_scomplex *a, bla_integer *lda, bla_scomplex *x, bla_integer *incx, bla_scomplex *beta, bla_scomplex *y, bla_integer *incy); -int PASTEF77(d,gbmv)(bla_character *trans, bla_integer *m, bla_integer *n, bla_integer *kl, bla_integer *ku, bla_double *alpha, bla_double *a, bla_integer *lda, bla_double *x, bla_integer *incx, bla_double *beta, bla_double *y, bla_integer *incy); -int PASTEF77(s,gbmv)(bla_character *trans, bla_integer *m, bla_integer *n, bla_integer *kl, bla_integer *ku, bla_real *alpha, bla_real *a, bla_integer *lda, bla_real *x, bla_integer * incx, bla_real *beta, bla_real *y, bla_integer *incy); -int PASTEF77(z,gbmv)(bla_character *trans, bla_integer *m, bla_integer *n, bla_integer *kl, bla_integer *ku, bla_dcomplex *alpha, bla_dcomplex *a, bla_integer *lda, bla_dcomplex *x, bla_integer *incx, bla_dcomplex *beta, bla_dcomplex * y, bla_integer *incy); +int PASTEF77(c,gbmv)(const bla_character *trans, const bla_integer *m, const bla_integer *n, const bla_integer *kl, const bla_integer *ku, const bla_scomplex *alpha, const bla_scomplex *a, const bla_integer *lda, const bla_scomplex *x, const bla_integer *incx, const bla_scomplex *beta, bla_scomplex *y, const bla_integer *incy); +int PASTEF77(d,gbmv)(const bla_character *trans, const bla_integer *m, const bla_integer *n, const bla_integer *kl, const bla_integer *ku, const bla_double *alpha, const bla_double *a, const bla_integer *lda, const bla_double *x, const bla_integer *incx, const bla_double *beta, bla_double *y, const bla_integer *incy); +int PASTEF77(s,gbmv)(const bla_character *trans, const bla_integer *m, const bla_integer *n, const bla_integer *kl, const bla_integer *ku, const bla_real *alpha, const bla_real *a, const bla_integer *lda, const bla_real *x, const bla_integer * incx, const bla_real *beta, bla_real *y, const bla_integer *incy); +int PASTEF77(z,gbmv)(const bla_character *trans, const bla_integer *m, const bla_integer *n, const bla_integer *kl, const bla_integer *ku, const bla_dcomplex *alpha, const bla_dcomplex *a, const bla_integer *lda, const bla_dcomplex *x, const bla_integer *incx, const bla_dcomplex *beta, bla_dcomplex * y, const bla_integer *incy); #endif diff --git a/frame/compat/f2c/bla_hbmv.c b/frame/compat/f2c/bla_hbmv.c index 33ee628fb..ca32c5e06 100644 --- a/frame/compat/f2c/bla_hbmv.c +++ b/frame/compat/f2c/bla_hbmv.c @@ -41,7 +41,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(c,hbmv)(bla_character *uplo, bla_integer *n, bla_integer *k, bla_scomplex * alpha, bla_scomplex *a, bla_integer *lda, bla_scomplex *x, bla_integer *incx, bla_scomplex *beta, bla_scomplex *y, bla_integer *incy) +/* Subroutine */ int PASTEF77(c,hbmv)(const bla_character *uplo, const bla_integer *n, const bla_integer *k, const bla_scomplex * alpha, const bla_scomplex *a, const bla_integer *lda, const bla_scomplex *x, const bla_integer *incx, const bla_scomplex *beta, bla_scomplex *y, const bla_integer *incy) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; @@ -49,15 +49,15 @@ bla_scomplex q__1, q__2, q__3, q__4; /* Builtin functions */ - void bla_r_cnjg(bla_scomplex *, bla_scomplex *); + //void bla_r_cnjg(bla_scomplex *, bla_scomplex *); /* Local variables */ bla_integer info; bla_scomplex temp1, temp2; bla_integer i__, j, l; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kplus1, ix, iy, jx, jy, kx, ky; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ @@ -487,7 +487,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(z,hbmv)(bla_character *uplo, bla_integer *n, bla_integer *k, bla_dcomplex *alpha, bla_dcomplex *a, bla_integer *lda, bla_dcomplex *x, bla_integer * incx, bla_dcomplex *beta, bla_dcomplex *y, bla_integer *incy) +/* Subroutine */ int PASTEF77(z,hbmv)(const bla_character *uplo, const bla_integer *n, const bla_integer *k, const bla_dcomplex *alpha, const bla_dcomplex *a, const bla_integer *lda, const bla_dcomplex *x, const bla_integer * incx, const bla_dcomplex *beta, bla_dcomplex *y, const bla_integer *incy) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; @@ -495,15 +495,15 @@ bla_dcomplex z__1, z__2, z__3, z__4; /* Builtin functions */ - void bla_d_cnjg(bla_dcomplex *, bla_dcomplex *); + //void bla_d_cnjg(bla_dcomplex *, bla_dcomplex *); /* Local variables */ bla_integer info; bla_dcomplex temp1, temp2; bla_integer i__, j, l; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kplus1, ix, iy, jx, jy, kx, ky; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ diff --git a/frame/compat/f2c/bla_hbmv.h b/frame/compat/f2c/bla_hbmv.h index 406e7d1a9..b43ac9c08 100644 --- a/frame/compat/f2c/bla_hbmv.h +++ b/frame/compat/f2c/bla_hbmv.h @@ -34,7 +34,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -int PASTEF77(c,hbmv)(bla_character *uplo, bla_integer *n, bla_integer *k, bla_scomplex * alpha, bla_scomplex *a, bla_integer *lda, bla_scomplex *x, bla_integer *incx, bla_scomplex *beta, bla_scomplex *y, bla_integer *incy); -int PASTEF77(z,hbmv)(bla_character *uplo, bla_integer *n, bla_integer *k, bla_dcomplex *alpha, bla_dcomplex *a, bla_integer *lda, bla_dcomplex *x, bla_integer * incx, bla_dcomplex *beta, bla_dcomplex *y, bla_integer *incy); +int PASTEF77(c,hbmv)(const bla_character *uplo, const bla_integer *n, const bla_integer *k, const bla_scomplex *alpha, const bla_scomplex *a, const bla_integer *lda, const bla_scomplex *x, const bla_integer *incx, const bla_scomplex *beta, bla_scomplex *y, const bla_integer *incy); +int PASTEF77(z,hbmv)(const bla_character *uplo, const bla_integer *n, const bla_integer *k, const bla_dcomplex *alpha, const bla_dcomplex *a, const bla_integer *lda, const bla_dcomplex *x, const bla_integer *incx, const bla_dcomplex *beta, bla_dcomplex *y, const bla_integer *incy); #endif diff --git a/frame/compat/f2c/bla_hpmv.c b/frame/compat/f2c/bla_hpmv.c index e43508fd6..8914db754 100644 --- a/frame/compat/f2c/bla_hpmv.c +++ b/frame/compat/f2c/bla_hpmv.c @@ -41,7 +41,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(c,hpmv)(bla_character *uplo, bla_integer *n, bla_scomplex *alpha, bla_scomplex * ap, bla_scomplex *x, bla_integer *incx, bla_scomplex *beta, bla_scomplex *y, bla_integer *incy) +/* Subroutine */ int PASTEF77(c,hpmv)(const bla_character *uplo, const bla_integer *n, const bla_scomplex *alpha, const bla_scomplex * ap, const bla_scomplex *x, const bla_integer *incx, const bla_scomplex *beta, bla_scomplex *y, const bla_integer *incy) { /* System generated locals */ bla_integer i__1, i__2, i__3, i__4, i__5; @@ -49,15 +49,15 @@ bla_scomplex q__1, q__2, q__3, q__4; /* Builtin functions */ - void bla_r_cnjg(bla_scomplex *, bla_scomplex *); + //void bla_r_cnjg(bla_scomplex *, bla_scomplex *); /* Local variables */ bla_integer info; bla_scomplex temp1, temp2; bla_integer i__, j, k; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, iy, jx, jy, kx, ky; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ @@ -439,7 +439,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(z,hpmv)(bla_character *uplo, bla_integer *n, bla_dcomplex *alpha, bla_dcomplex *ap, bla_dcomplex *x, bla_integer *incx, bla_dcomplex *beta, bla_dcomplex *y, bla_integer *incy) +/* Subroutine */ int PASTEF77(z,hpmv)(const bla_character *uplo, const bla_integer *n, const bla_dcomplex *alpha, const bla_dcomplex *ap, const bla_dcomplex *x, const bla_integer *incx, const bla_dcomplex *beta, bla_dcomplex *y, const bla_integer *incy) { /* System generated locals */ bla_integer i__1, i__2, i__3, i__4, i__5; @@ -447,15 +447,15 @@ bla_dcomplex z__1, z__2, z__3, z__4; /* Builtin functions */ - void bla_d_cnjg(bla_dcomplex *, bla_dcomplex *); + //void bla_d_cnjg(bla_dcomplex *, bla_dcomplex *); /* Local variables */ bla_integer info; bla_dcomplex temp1, temp2; bla_integer i__, j, k; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, iy, jx, jy, kx, ky; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ diff --git a/frame/compat/f2c/bla_hpmv.h b/frame/compat/f2c/bla_hpmv.h index 0878c8e4c..2adf446c9 100644 --- a/frame/compat/f2c/bla_hpmv.h +++ b/frame/compat/f2c/bla_hpmv.h @@ -34,7 +34,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -int PASTEF77(c,hpmv)(bla_character *uplo, bla_integer *n, bla_scomplex *alpha, bla_scomplex * ap, bla_scomplex *x, bla_integer *incx, bla_scomplex *beta, bla_scomplex *y, bla_integer *incy); -int PASTEF77(z,hpmv)(bla_character *uplo, bla_integer *n, bla_dcomplex *alpha, bla_dcomplex *ap, bla_dcomplex *x, bla_integer *incx, bla_dcomplex *beta, bla_dcomplex *y, bla_integer *incy); +int PASTEF77(c,hpmv)(const bla_character *uplo, const bla_integer *n, const bla_scomplex *alpha, const bla_scomplex *ap, const bla_scomplex *x, const bla_integer *incx, const bla_scomplex *beta, bla_scomplex *y, const bla_integer *incy); +int PASTEF77(z,hpmv)(const bla_character *uplo, const bla_integer *n, const bla_dcomplex *alpha, const bla_dcomplex *ap, const bla_dcomplex *x, const bla_integer *incx, const bla_dcomplex *beta, bla_dcomplex *y, const bla_integer *incy); #endif diff --git a/frame/compat/f2c/bla_hpr.c b/frame/compat/f2c/bla_hpr.c index 7d110170c..cdf3f016f 100644 --- a/frame/compat/f2c/bla_hpr.c +++ b/frame/compat/f2c/bla_hpr.c @@ -41,7 +41,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(c,hpr)(bla_character *uplo, bla_integer *n, bla_real *alpha, bla_scomplex *x, bla_integer *incx, bla_scomplex *ap) +/* Subroutine */ int PASTEF77(c,hpr)(const bla_character *uplo, const bla_integer *n, const bla_real *alpha, const bla_scomplex *x, const bla_integer *incx, bla_scomplex *ap) { /* System generated locals */ bla_integer i__1, i__2, i__3, i__4, i__5; @@ -49,15 +49,15 @@ bla_scomplex q__1, q__2; /* Builtin functions */ - void bla_r_cnjg(bla_scomplex *, bla_scomplex *); + //void bla_r_cnjg(bla_scomplex *, bla_scomplex *); /* Local variables */ bla_integer info; bla_scomplex temp; bla_integer i__, j, k; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, jx, kx = 0; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ @@ -353,7 +353,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(z,hpr)(bla_character *uplo, bla_integer *n, bla_double *alpha, bla_dcomplex *x, bla_integer *incx, bla_dcomplex *ap) +/* Subroutine */ int PASTEF77(z,hpr)(const bla_character *uplo, const bla_integer *n, const bla_double *alpha, const bla_dcomplex *x, const bla_integer *incx, bla_dcomplex *ap) { /* System generated locals */ bla_integer i__1, i__2, i__3, i__4, i__5; @@ -361,15 +361,15 @@ bla_dcomplex z__1, z__2; /* Builtin functions */ - void bla_d_cnjg(bla_dcomplex *, bla_dcomplex *); + //void bla_d_cnjg(bla_dcomplex *, bla_dcomplex *); /* Local variables */ bla_integer info; bla_dcomplex temp; bla_integer i__, j, k; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, jx, kx = 0; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ diff --git a/frame/compat/f2c/bla_hpr.h b/frame/compat/f2c/bla_hpr.h index 036538f21..448c04bc0 100644 --- a/frame/compat/f2c/bla_hpr.h +++ b/frame/compat/f2c/bla_hpr.h @@ -34,7 +34,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -int PASTEF77(c,hpr)(bla_character *uplo, bla_integer *n, bla_real *alpha, bla_scomplex *x, bla_integer *incx, bla_scomplex *ap); -int PASTEF77(z,hpr)(bla_character *uplo, bla_integer *n, bla_double *alpha, bla_dcomplex *x, bla_integer *incx, bla_dcomplex *ap); +int PASTEF77(c,hpr)(const bla_character *uplo, const bla_integer *n, const bla_real *alpha, const bla_scomplex *x, const bla_integer *incx, bla_scomplex *ap); +int PASTEF77(z,hpr)(const bla_character *uplo, const bla_integer *n, const bla_double *alpha, const bla_dcomplex *x, const bla_integer *incx, bla_dcomplex *ap); #endif diff --git a/frame/compat/f2c/bla_hpr2.c b/frame/compat/f2c/bla_hpr2.c index c1563fc56..432a67b4b 100644 --- a/frame/compat/f2c/bla_hpr2.c +++ b/frame/compat/f2c/bla_hpr2.c @@ -41,7 +41,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(c,hpr2)(bla_character *uplo, bla_integer *n, bla_scomplex *alpha, bla_scomplex *x, bla_integer *incx, bla_scomplex *y, bla_integer *incy, bla_scomplex *ap) +/* Subroutine */ int PASTEF77(c,hpr2)(const bla_character *uplo, const bla_integer *n, const bla_scomplex *alpha, const bla_scomplex *x, const bla_integer *incx, const bla_scomplex *y, const bla_integer *incy, bla_scomplex *ap) { /* System generated locals */ bla_integer i__1, i__2, i__3, i__4, i__5, i__6; @@ -49,15 +49,15 @@ bla_scomplex q__1, q__2, q__3, q__4; /* Builtin functions */ - void bla_r_cnjg(bla_scomplex *, bla_scomplex *); + //void bla_r_cnjg(bla_scomplex *, bla_scomplex *); /* Local variables */ bla_integer info; bla_scomplex temp1, temp2; bla_integer i__, j, k; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, iy, jx = 0, jy = 0, kx = 0, ky = 0; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ @@ -429,7 +429,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(z,hpr2)(bla_character *uplo, bla_integer *n, bla_dcomplex *alpha, bla_dcomplex *x, bla_integer *incx, bla_dcomplex *y, bla_integer *incy, bla_dcomplex *ap) +/* Subroutine */ int PASTEF77(z,hpr2)(const bla_character *uplo, const bla_integer *n, const bla_dcomplex *alpha, const bla_dcomplex *x, const bla_integer *incx, const bla_dcomplex *y, const bla_integer *incy, bla_dcomplex *ap) { /* System generated locals */ bla_integer i__1, i__2, i__3, i__4, i__5, i__6; @@ -437,15 +437,15 @@ bla_dcomplex z__1, z__2, z__3, z__4; /* Builtin functions */ - void bla_d_cnjg(bla_dcomplex *, bla_dcomplex *); + //void bla_d_cnjg(bla_dcomplex *, bla_dcomplex *); /* Local variables */ bla_integer info; bla_dcomplex temp1, temp2; bla_integer i__, j, k; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, iy, jx = 0, jy = 0, kx = 0, ky = 0; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ diff --git a/frame/compat/f2c/bla_hpr2.h b/frame/compat/f2c/bla_hpr2.h index 0b1e254b7..8e7db190c 100644 --- a/frame/compat/f2c/bla_hpr2.h +++ b/frame/compat/f2c/bla_hpr2.h @@ -34,7 +34,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -int PASTEF77(c,hpr2)(bla_character *uplo, bla_integer *n, bla_scomplex *alpha, bla_scomplex *x, bla_integer *incx, bla_scomplex *y, bla_integer *incy, bla_scomplex *ap); -int PASTEF77(z,hpr2)(bla_character *uplo, bla_integer *n, bla_dcomplex *alpha, bla_dcomplex *x, bla_integer *incx, bla_dcomplex *y, bla_integer *incy, bla_dcomplex *ap); +int PASTEF77(c,hpr2)(const bla_character *uplo, const bla_integer *n, const bla_scomplex *alpha, const bla_scomplex *x, const bla_integer *incx, const bla_scomplex *y, const bla_integer *incy, bla_scomplex *ap); +int PASTEF77(z,hpr2)(const bla_character *uplo, const bla_integer *n, const bla_dcomplex *alpha, const bla_dcomplex *x, const bla_integer *incx, const bla_dcomplex *y, const bla_integer *incy, bla_dcomplex *ap); #endif diff --git a/frame/compat/f2c/bla_lsame.c b/frame/compat/f2c/bla_lsame.c index 9bd91bdc6..04f8caad0 100644 --- a/frame/compat/f2c/bla_lsame.c +++ b/frame/compat/f2c/bla_lsame.c @@ -41,7 +41,7 @@ -lf2c -lm (in that order) */ -bla_logical PASTEF770(lsame)(bla_character *ca, bla_character *cb, ftnlen ca_len, ftnlen cb_len) +bla_logical PASTEF770(lsame)(const bla_character *ca, const bla_character *cb, ftnlen ca_len, ftnlen cb_len) { /* System generated locals */ bla_logical ret_val; diff --git a/frame/compat/f2c/bla_lsame.h b/frame/compat/f2c/bla_lsame.h index 0b5eb175e..7e2f92389 100644 --- a/frame/compat/f2c/bla_lsame.h +++ b/frame/compat/f2c/bla_lsame.h @@ -34,6 +34,6 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -bla_logical PASTEF770(lsame)(bla_character *ca, bla_character *cb, ftnlen ca_len, ftnlen cb_len); +bla_logical PASTEF770(lsame)(const bla_character *ca, const bla_character *cb, ftnlen ca_len, ftnlen cb_len); #endif diff --git a/frame/compat/f2c/bla_rot.c b/frame/compat/f2c/bla_rot.c index 18618ee5e..4b291cc9c 100644 --- a/frame/compat/f2c/bla_rot.c +++ b/frame/compat/f2c/bla_rot.c @@ -41,7 +41,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(s,rot)(bla_integer *n, bla_real *sx, bla_integer *incx, bla_real *sy, bla_integer *incy, bla_real *c__, bla_real *s) +/* Subroutine */ int PASTEF77(s,rot)(const bla_integer *n, bla_real *sx, const bla_integer *incx, bla_real *sy, const bla_integer *incy, const bla_real *c__, const bla_real *s) { /* System generated locals */ bla_integer i__1; @@ -109,7 +109,7 @@ L20: -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(d,rot)(bla_integer *n, bla_double *dx, bla_integer *incx, bla_double *dy, bla_integer *incy, bla_double *c__, bla_double *s) +/* Subroutine */ int PASTEF77(d,rot)(const bla_integer *n, bla_double *dx, const bla_integer *incx, bla_double *dy, const bla_integer *incy, const bla_double *c__, const bla_double *s) { /* System generated locals */ bla_integer i__1; @@ -177,7 +177,7 @@ L20: -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(cs,rot)(bla_integer *n, bla_scomplex *cx, bla_integer *incx, bla_scomplex *cy, bla_integer *incy, bla_real *c__, bla_real *s) +/* Subroutine */ int PASTEF77(cs,rot)(const bla_integer *n, bla_scomplex *cx, const bla_integer *incx, bla_scomplex *cy, const bla_integer *incy, const bla_real *c__, const bla_real *s) { /* System generated locals */ bla_integer i__1, i__2, i__3, i__4; @@ -270,7 +270,7 @@ L20: -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(zd,rot)(bla_integer *n, bla_dcomplex *zx, bla_integer *incx, bla_dcomplex *zy, bla_integer *incy, bla_double *c__, bla_double *s) +/* Subroutine */ int PASTEF77(zd,rot)(const bla_integer *n, bla_dcomplex *zx, const bla_integer *incx, bla_dcomplex *zy, const bla_integer *incy, const bla_double *c__, const bla_double *s) { /* System generated locals */ bla_integer i__1, i__2, i__3, i__4; diff --git a/frame/compat/f2c/bla_rot.h b/frame/compat/f2c/bla_rot.h index f55b5492b..fd8d8991b 100644 --- a/frame/compat/f2c/bla_rot.h +++ b/frame/compat/f2c/bla_rot.h @@ -34,9 +34,9 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -int PASTEF77(s,rot)(bla_integer *n, bla_real *sx, bla_integer *incx, bla_real *sy, bla_integer *incy, bla_real *c__, bla_real *s); -int PASTEF77(d,rot)(bla_integer *n, bla_double *dx, bla_integer *incx, bla_double *dy, bla_integer *incy, bla_double *c__, bla_double *s); -int PASTEF77(cs,rot)(bla_integer *n, bla_scomplex *cx, bla_integer *incx, bla_scomplex *cy, bla_integer *incy, bla_real *c__, bla_real *s); -int PASTEF77(zd,rot)(bla_integer *n, bla_dcomplex *zx, bla_integer *incx, bla_dcomplex *zy, bla_integer *incy, bla_double *c__, bla_double *s); +int PASTEF77(s,rot)(const bla_integer *n, bla_real *sx, const bla_integer *incx, bla_real *sy, const bla_integer *incy, const bla_real *c__, const bla_real *s); +int PASTEF77(d,rot)(const bla_integer *n, bla_double *dx, const bla_integer *incx, bla_double *dy, const bla_integer *incy, const bla_double *c__, const bla_double *s); +int PASTEF77(cs,rot)(const bla_integer *n, bla_scomplex *cx, const bla_integer *incx, bla_scomplex *cy, const bla_integer *incy, const bla_real *c__, const bla_real *s); +int PASTEF77(zd,rot)(const bla_integer *n, bla_dcomplex *zx, const bla_integer *incx, bla_dcomplex *zy, const bla_integer *incy, const bla_double *c__, const bla_double *s); #endif diff --git a/frame/compat/f2c/bla_rotg.c b/frame/compat/f2c/bla_rotg.c index 490b01a59..ff6cc239d 100644 --- a/frame/compat/f2c/bla_rotg.c +++ b/frame/compat/f2c/bla_rotg.c @@ -51,7 +51,7 @@ static bla_real sc_b4 = 1.f; bla_real r__1, r__2; /* Builtin functions */ - double sqrt(bla_double), bla_r_sign(bla_real *, bla_real *); + //double sqrt(bla_double), bla_r_sign(bla_real *, bla_real *); /* Local variables */ bla_real r__, scale, z__, roe; @@ -111,7 +111,7 @@ static bla_double dc_b4 = 1.; bla_double d__1, d__2; /* Builtin functions */ - double sqrt(bla_double), bla_d_sign(bla_double *, bla_double *); + //double sqrt(bla_double), bla_d_sign(bla_double *, bla_double *); /* Local variables */ bla_double r__, scale, z__, roe; @@ -168,8 +168,8 @@ L20: bla_scomplex q__1, q__2, q__3; /* Builtin functions */ - double bla_c_abs(bla_scomplex *), sqrt(bla_double); - void bla_r_cnjg(bla_scomplex *, bla_scomplex *); + //double bla_c_abs(bla_scomplex *), sqrt(bla_double); + //void bla_r_cnjg(bla_scomplex *, bla_scomplex *); /* Local variables */ bla_real norm; @@ -218,10 +218,10 @@ L20: bla_dcomplex z__1, z__2, z__3, z__4; /* Builtin functions */ - double bla_z_abs(bla_dcomplex *); - void bla_z_div(bla_dcomplex *, bla_dcomplex *, bla_dcomplex *); - double sqrt(bla_double); - void bla_d_cnjg(bla_dcomplex *, bla_dcomplex *); + //double bla_z_abs(bla_dcomplex *); + //void bla_z_div(bla_dcomplex *, bla_dcomplex *, bla_dcomplex *); + //double sqrt(bla_double); + //void bla_d_cnjg(bla_dcomplex *, bla_dcomplex *); /* Local variables */ bla_double norm; diff --git a/frame/compat/f2c/bla_rotm.c b/frame/compat/f2c/bla_rotm.c index 240c42bb7..db40cbbff 100644 --- a/frame/compat/f2c/bla_rotm.c +++ b/frame/compat/f2c/bla_rotm.c @@ -41,7 +41,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(s,rotm)(bla_integer *n, bla_real *sx, bla_integer *incx, bla_real *sy, bla_integer *incy, bla_real *sparam) +/* Subroutine */ int PASTEF77(s,rotm)(const bla_integer *n, bla_real *sx, const bla_integer *incx, bla_real *sy, const bla_integer *incy, const bla_real *sparam) { /* Initialized data */ @@ -207,7 +207,7 @@ L140: -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(d,rotm)(bla_integer *n, bla_double *dx, bla_integer *incx, bla_double *dy, bla_integer *incy, bla_double *dparam) +/* Subroutine */ int PASTEF77(d,rotm)(const bla_integer *n, bla_double *dx, const bla_integer *incx, bla_double *dy, const bla_integer *incy, const bla_double *dparam) { /* Initialized data */ diff --git a/frame/compat/f2c/bla_rotm.h b/frame/compat/f2c/bla_rotm.h index 28cf8ec25..b8fc07453 100644 --- a/frame/compat/f2c/bla_rotm.h +++ b/frame/compat/f2c/bla_rotm.h @@ -34,7 +34,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -int PASTEF77(s,rotm)(bla_integer *n, bla_real *sx, bla_integer *incx, bla_real *sy, bla_integer *incy, bla_real *sparam); -int PASTEF77(d,rotm)(bla_integer *n, bla_double *dx, bla_integer *incx, bla_double *dy, bla_integer *incy, bla_double *dparam); +int PASTEF77(s,rotm)(const bla_integer *n, bla_real *sx, const bla_integer *incx, bla_real *sy, const bla_integer *incy, const bla_real *sparam); +int PASTEF77(d,rotm)(const bla_integer *n, bla_double *dx, const bla_integer *incx, bla_double *dy, const bla_integer *incy, const bla_double *dparam); #endif diff --git a/frame/compat/f2c/bla_rotmg.c b/frame/compat/f2c/bla_rotmg.c index b3c734628..46bd149d2 100644 --- a/frame/compat/f2c/bla_rotmg.c +++ b/frame/compat/f2c/bla_rotmg.c @@ -41,7 +41,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(s,rotmg)(bla_real *sd1, bla_real *sd2, bla_real *sx1, bla_real *sy1, bla_real *sparam) +/* Subroutine */ int PASTEF77(s,rotmg)(bla_real *sd1, bla_real *sd2, bla_real *sx1, const bla_real *sy1, bla_real *sparam) { /* Initialized data */ @@ -281,7 +281,7 @@ L260: -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(d,rotmg)(bla_double *dd1, bla_double *dd2, bla_double *dx1, bla_double *dy1, bla_double *dparam) +/* Subroutine */ int PASTEF77(d,rotmg)(bla_double *dd1, bla_double *dd2, bla_double *dx1, const bla_double *dy1, bla_double *dparam) { /* Initialized data */ diff --git a/frame/compat/f2c/bla_rotmg.h b/frame/compat/f2c/bla_rotmg.h index e6cccf8b7..512d46562 100644 --- a/frame/compat/f2c/bla_rotmg.h +++ b/frame/compat/f2c/bla_rotmg.h @@ -34,7 +34,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -int PASTEF77(s,rotmg)(bla_real *sd1, bla_real *sd2, bla_real *sx1, bla_real *sy1, bla_real *sparam); -int PASTEF77(d,rotmg)(bla_double *dd1, bla_double *dd2, bla_double *dx1, bla_double *dy1, bla_double *dparam); +int PASTEF77(s,rotmg)(bla_real *sd1, bla_real *sd2, bla_real *sx1, const bla_real *sy1, bla_real *sparam); +int PASTEF77(d,rotmg)(bla_double *dd1, bla_double *dd2, bla_double *dx1, const bla_double *dy1, bla_double *dparam); #endif diff --git a/frame/compat/f2c/bla_sbmv.c b/frame/compat/f2c/bla_sbmv.c index 421233d0f..7f2cf4beb 100644 --- a/frame/compat/f2c/bla_sbmv.c +++ b/frame/compat/f2c/bla_sbmv.c @@ -41,7 +41,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(d,sbmv)(bla_character *uplo, bla_integer *n, bla_integer *k, bla_double *alpha, bla_double *a, bla_integer *lda, bla_double *x, bla_integer *incx, bla_double *beta, bla_double *y, bla_integer *incy) +/* Subroutine */ int PASTEF77(d,sbmv)(const bla_character *uplo, const bla_integer *n, const bla_integer *k, const bla_double *alpha, const bla_double *a, const bla_integer *lda, const bla_double *x, const bla_integer *incx, const bla_double *beta, bla_double *y, const bla_integer *incy) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4; @@ -50,9 +50,9 @@ bla_integer info; bla_double temp1, temp2; bla_integer i__, j, l; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kplus1, ix, iy, jx, jy, kx, ky; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ @@ -392,7 +392,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(s,sbmv)(bla_character *uplo, bla_integer *n, bla_integer *k, bla_real *alpha, bla_real *a, bla_integer *lda, bla_real *x, bla_integer *incx, bla_real *beta, bla_real *y, bla_integer *incy) +/* Subroutine */ int PASTEF77(s,sbmv)(const bla_character *uplo, const bla_integer *n, const bla_integer *k, const bla_real *alpha, const bla_real *a, const bla_integer *lda, const bla_real *x, const bla_integer *incx, const bla_real *beta, bla_real *y, const bla_integer *incy) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4; @@ -401,9 +401,9 @@ bla_integer info; bla_real temp1, temp2; bla_integer i__, j, l; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kplus1, ix, iy, jx, jy, kx, ky; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ diff --git a/frame/compat/f2c/bla_sbmv.h b/frame/compat/f2c/bla_sbmv.h index 16f0dbb37..8258ac4a8 100644 --- a/frame/compat/f2c/bla_sbmv.h +++ b/frame/compat/f2c/bla_sbmv.h @@ -34,7 +34,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -int PASTEF77(d,sbmv)(bla_character *uplo, bla_integer *n, bla_integer *k, bla_double *alpha, bla_double *a, bla_integer *lda, bla_double *x, bla_integer *incx, bla_double *beta, bla_double *y, bla_integer *incy); -int PASTEF77(s,sbmv)(bla_character *uplo, bla_integer *n, bla_integer *k, bla_real *alpha, bla_real *a, bla_integer *lda, bla_real *x, bla_integer *incx, bla_real *beta, bla_real *y, bla_integer *incy); +int PASTEF77(d,sbmv)(const bla_character *uplo, const bla_integer *n, const bla_integer *k, const bla_double *alpha, const bla_double *a, const bla_integer *lda, const bla_double *x, const bla_integer *incx, const bla_double *beta, bla_double *y, const bla_integer *incy); +int PASTEF77(s,sbmv)(const bla_character *uplo, const bla_integer *n, const bla_integer *k, const bla_real *alpha, const bla_real *a, const bla_integer *lda, const bla_real *x, const bla_integer *incx, const bla_real *beta, bla_real *y, const bla_integer *incy); #endif diff --git a/frame/compat/f2c/bla_spmv.c b/frame/compat/f2c/bla_spmv.c index 360124203..b0c1f7f8a 100644 --- a/frame/compat/f2c/bla_spmv.c +++ b/frame/compat/f2c/bla_spmv.c @@ -41,7 +41,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(d,spmv)(bla_character *uplo, bla_integer *n, bla_double *alpha, bla_double *ap, bla_double *x, bla_integer *incx, bla_double *beta, bla_double *y, bla_integer *incy) +/* Subroutine */ int PASTEF77(d,spmv)(const bla_character *uplo, const bla_integer *n, const bla_double *alpha, const bla_double *ap, const bla_double *x, const bla_integer *incx, const bla_double *beta, bla_double *y, const bla_integer *incy) { /* System generated locals */ bla_integer i__1, i__2; @@ -50,9 +50,9 @@ bla_integer info; bla_double temp1, temp2; bla_integer i__, j, k; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, iy, jx, jy, kx, ky; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ @@ -342,7 +342,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(s,spmv)(bla_character *uplo, bla_integer *n, bla_real *alpha, bla_real *ap, bla_real *x, bla_integer *incx, bla_real *beta, bla_real *y, bla_integer *incy) +/* Subroutine */ int PASTEF77(s,spmv)(const bla_character *uplo, const bla_integer *n, const bla_real *alpha, const bla_real *ap, const bla_real *x, const bla_integer *incx, const bla_real *beta, bla_real *y, const bla_integer *incy) { /* System generated locals */ bla_integer i__1, i__2; @@ -351,9 +351,9 @@ bla_integer info; bla_real temp1, temp2; bla_integer i__, j, k; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, iy, jx, jy, kx, ky; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ diff --git a/frame/compat/f2c/bla_spmv.h b/frame/compat/f2c/bla_spmv.h index d58349345..8da683bb9 100644 --- a/frame/compat/f2c/bla_spmv.h +++ b/frame/compat/f2c/bla_spmv.h @@ -34,7 +34,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -int PASTEF77(d,spmv)(bla_character *uplo, bla_integer *n, bla_double *alpha, bla_double *ap, bla_double *x, bla_integer *incx, bla_double *beta, bla_double *y, bla_integer *incy); -int PASTEF77(s,spmv)(bla_character *uplo, bla_integer *n, bla_real *alpha, bla_real *ap, bla_real *x, bla_integer *incx, bla_real *beta, bla_real *y, bla_integer *incy); +int PASTEF77(d,spmv)(const bla_character *uplo, const bla_integer *n, const bla_double *alpha, const bla_double *ap, const bla_double *x, const bla_integer *incx, const bla_double *beta, bla_double *y, const bla_integer *incy); +int PASTEF77(s,spmv)(const bla_character *uplo, const bla_integer *n, const bla_real *alpha, const bla_real *ap, const bla_real *x, const bla_integer *incx, const bla_real *beta, bla_real *y, const bla_integer *incy); #endif diff --git a/frame/compat/f2c/bla_spr.c b/frame/compat/f2c/bla_spr.c index 9b4be0d91..8fb0b4d50 100644 --- a/frame/compat/f2c/bla_spr.c +++ b/frame/compat/f2c/bla_spr.c @@ -41,7 +41,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(d,spr)(bla_character *uplo, bla_integer *n, bla_double *alpha, bla_double *x, bla_integer *incx, bla_double *ap) +/* Subroutine */ int PASTEF77(d,spr)(const bla_character *uplo, const bla_integer *n, const bla_double *alpha, const bla_double *x, const bla_integer *incx, bla_double *ap) { /* System generated locals */ bla_integer i__1, i__2; @@ -50,9 +50,9 @@ bla_integer info; bla_double temp; bla_integer i__, j, k; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, jx, kx = 0; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ @@ -268,7 +268,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(s,spr)(bla_character *uplo, bla_integer *n, bla_real *alpha, bla_real *x, bla_integer *incx, bla_real *ap) +/* Subroutine */ int PASTEF77(s,spr)(const bla_character *uplo, const bla_integer *n, const bla_real *alpha, const bla_real *x, const bla_integer *incx, bla_real *ap) { /* System generated locals */ bla_integer i__1, i__2; @@ -277,9 +277,9 @@ bla_integer info; bla_real temp; bla_integer i__, j, k; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, jx, kx = 0; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ diff --git a/frame/compat/f2c/bla_spr.h b/frame/compat/f2c/bla_spr.h index 2e9d4523a..68593758a 100644 --- a/frame/compat/f2c/bla_spr.h +++ b/frame/compat/f2c/bla_spr.h @@ -34,7 +34,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -int PASTEF77(d,spr)(bla_character *uplo, bla_integer *n, bla_double *alpha, bla_double *x, bla_integer *incx, bla_double *ap); -int PASTEF77(s,spr)(bla_character *uplo, bla_integer *n, bla_real *alpha, bla_real *x, bla_integer *incx, bla_real *ap); +int PASTEF77(d,spr)(const bla_character *uplo, const bla_integer *n, const bla_double *alpha, const bla_double *x, const bla_integer *incx, bla_double *ap); +int PASTEF77(s,spr)(const bla_character *uplo, const bla_integer *n, const bla_real *alpha, const bla_real *x, const bla_integer *incx, bla_real *ap); #endif diff --git a/frame/compat/f2c/bla_spr2.c b/frame/compat/f2c/bla_spr2.c index 3ec64f672..bb4f50ec8 100644 --- a/frame/compat/f2c/bla_spr2.c +++ b/frame/compat/f2c/bla_spr2.c @@ -41,7 +41,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(d,spr2)(bla_character *uplo, bla_integer *n, bla_double *alpha, bla_double *x, bla_integer *incx, bla_double *y, bla_integer *incy, bla_double *ap) +/* Subroutine */ int PASTEF77(d,spr2)(const bla_character *uplo, const bla_integer *n, const bla_double *alpha, const bla_double *x, const bla_integer *incx, const bla_double *y, const bla_integer *incy, bla_double *ap) { /* System generated locals */ bla_integer i__1, i__2; @@ -50,9 +50,9 @@ bla_integer info; bla_double temp1, temp2; bla_integer i__, j, k; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, iy, jx = 0, jy = 0, kx = 0, ky = 0; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ @@ -300,7 +300,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(s,spr2)(bla_character *uplo, bla_integer *n, bla_real *alpha, bla_real *x, bla_integer *incx, bla_real *y, bla_integer *incy, bla_real *ap) +/* Subroutine */ int PASTEF77(s,spr2)(const bla_character *uplo, const bla_integer *n, const bla_real *alpha, const bla_real *x, const bla_integer *incx, const bla_real *y, const bla_integer *incy, bla_real *ap) { /* System generated locals */ bla_integer i__1, i__2; @@ -309,9 +309,9 @@ bla_integer info; bla_real temp1, temp2; bla_integer i__, j, k; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, iy, jx = 0, jy = 0, kx = 0, ky = 0; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); /* .. Scalar Arguments .. */ /* .. Array Arguments .. */ diff --git a/frame/compat/f2c/bla_spr2.h b/frame/compat/f2c/bla_spr2.h index 50f18d928..79516c128 100644 --- a/frame/compat/f2c/bla_spr2.h +++ b/frame/compat/f2c/bla_spr2.h @@ -34,7 +34,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -int PASTEF77(d,spr2)(bla_character *uplo, bla_integer *n, bla_double *alpha, bla_double *x, bla_integer *incx, bla_double *y, bla_integer *incy, bla_double *ap); -int PASTEF77(s,spr2)(bla_character *uplo, bla_integer *n, bla_real *alpha, bla_real *x, bla_integer *incx, bla_real *y, bla_integer *incy, bla_real *ap); +int PASTEF77(d,spr2)(const bla_character *uplo, const bla_integer *n, const bla_double *alpha, const bla_double *x, const bla_integer *incx, const bla_double *y, const bla_integer *incy, bla_double *ap); +int PASTEF77(s,spr2)(const bla_character *uplo, const bla_integer *n, const bla_real *alpha, const bla_real *x, const bla_integer *incx, const bla_real *y, const bla_integer *incy, bla_real *ap); #endif diff --git a/frame/compat/f2c/bla_tbmv.c b/frame/compat/f2c/bla_tbmv.c index 82d963348..563f50ba3 100644 --- a/frame/compat/f2c/bla_tbmv.c +++ b/frame/compat/f2c/bla_tbmv.c @@ -41,22 +41,22 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(c,tbmv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_integer *k, bla_scomplex *a, bla_integer *lda, bla_scomplex *x, bla_integer *incx) +/* Subroutine */ int PASTEF77(c,tbmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_scomplex *a, const bla_integer *lda, bla_scomplex *x, const bla_integer *incx) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; bla_scomplex q__1, q__2, q__3; /* Builtin functions */ - void bla_r_cnjg(bla_scomplex *, bla_scomplex *); + //void bla_r_cnjg(bla_scomplex *, bla_scomplex *); /* Local variables */ bla_integer info; bla_scomplex temp; bla_integer i__, j, l; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kplus1, ix, jx, kx = 0; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical noconj, nounit; /* .. Scalar Arguments .. */ @@ -611,7 +611,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(d,tbmv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_integer *k, bla_double *a, bla_integer *lda, bla_double *x, bla_integer *incx) +/* Subroutine */ int PASTEF77(d,tbmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_double *a, const bla_integer *lda, bla_double *x, const bla_integer *incx) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4; @@ -620,9 +620,9 @@ bla_integer info; bla_double temp; bla_integer i__, j, l; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kplus1, ix, jx, kx = 0; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical nounit; /* .. Scalar Arguments .. */ @@ -1022,7 +1022,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(s,tbmv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_integer *k, bla_real *a, bla_integer *lda, bla_real *x, bla_integer *incx) +/* Subroutine */ int PASTEF77(s,tbmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_real *a, const bla_integer *lda, bla_real *x, const bla_integer *incx) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4; @@ -1031,9 +1031,9 @@ bla_integer info; bla_real temp; bla_integer i__, j, l; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kplus1, ix, jx, kx = 0; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical nounit; /* .. Scalar Arguments .. */ @@ -1433,22 +1433,22 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(z,tbmv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_integer *k, bla_dcomplex *a, bla_integer *lda, bla_dcomplex *x, bla_integer *incx) +/* Subroutine */ int PASTEF77(z,tbmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_dcomplex *a, const bla_integer *lda, bla_dcomplex *x, const bla_integer *incx) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; bla_dcomplex z__1, z__2, z__3; /* Builtin functions */ - void bla_d_cnjg(bla_dcomplex *, bla_dcomplex *); + //void bla_d_cnjg(bla_dcomplex *, bla_dcomplex *); /* Local variables */ bla_integer info; bla_dcomplex temp; bla_integer i__, j, l; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kplus1, ix, jx, kx = 0; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical noconj, nounit; /* .. Scalar Arguments .. */ diff --git a/frame/compat/f2c/bla_tbmv.h b/frame/compat/f2c/bla_tbmv.h index e73cdaa3b..f09c4ac2a 100644 --- a/frame/compat/f2c/bla_tbmv.h +++ b/frame/compat/f2c/bla_tbmv.h @@ -34,9 +34,9 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -int PASTEF77(c,tbmv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_integer *k, bla_scomplex *a, bla_integer *lda, bla_scomplex *x, bla_integer *incx); -int PASTEF77(d,tbmv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_integer *k, bla_double *a, bla_integer *lda, bla_double *x, bla_integer *incx); -int PASTEF77(s,tbmv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_integer *k, bla_real *a, bla_integer *lda, bla_real *x, bla_integer *incx); -int PASTEF77(z,tbmv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_integer *k, bla_dcomplex *a, bla_integer *lda, bla_dcomplex *x, bla_integer *incx); +int PASTEF77(c,tbmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_scomplex *a, const bla_integer *lda, bla_scomplex *x, const bla_integer *incx); +int PASTEF77(d,tbmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_double *a, const bla_integer *lda, bla_double *x, const bla_integer *incx); +int PASTEF77(s,tbmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_real *a, const bla_integer *lda, bla_real *x, const bla_integer *incx); +int PASTEF77(z,tbmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_dcomplex *a, const bla_integer *lda, bla_dcomplex *x, const bla_integer *incx); #endif diff --git a/frame/compat/f2c/bla_tbsv.c b/frame/compat/f2c/bla_tbsv.c index 848c19aaf..f4fda885c 100644 --- a/frame/compat/f2c/bla_tbsv.c +++ b/frame/compat/f2c/bla_tbsv.c @@ -41,22 +41,22 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(c,tbsv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_integer *k, bla_scomplex *a, bla_integer *lda, bla_scomplex *x, bla_integer *incx) +/* Subroutine */ int PASTEF77(c,tbsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_scomplex *a, const bla_integer *lda, bla_scomplex *x, const bla_integer *incx) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; bla_scomplex q__1, q__2, q__3; /* Builtin functions */ - void bla_c_div(bla_scomplex *, bla_scomplex *, bla_scomplex *), bla_r_cnjg(bla_scomplex *, bla_scomplex *); + //void bla_c_div(bla_scomplex *, bla_scomplex *, bla_scomplex *), bla_r_cnjg(bla_scomplex *, bla_scomplex *); /* Local variables */ bla_integer info; bla_scomplex temp; bla_integer i__, j, l; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kplus1, ix, jx, kx = 0; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical noconj, nounit; /* .. Scalar Arguments .. */ @@ -603,7 +603,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(d,tbsv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_integer *k, bla_double *a, bla_integer *lda, bla_double *x, bla_integer *incx) +/* Subroutine */ int PASTEF77(d,tbsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_double *a, const bla_integer *lda, bla_double *x, const bla_integer *incx) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4; @@ -612,9 +612,9 @@ bla_integer info; bla_double temp; bla_integer i__, j, l; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kplus1, ix, jx, kx = 0; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical nounit; /* .. Scalar Arguments .. */ @@ -1018,7 +1018,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(s,tbsv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_integer *k, bla_real *a, bla_integer *lda, bla_real *x, bla_integer *incx) +/* Subroutine */ int PASTEF77(s,tbsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_real *a, const bla_integer *lda, bla_real *x, const bla_integer *incx) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4; @@ -1027,9 +1027,9 @@ bla_integer info; bla_real temp; bla_integer i__, j, l; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kplus1, ix, jx, kx = 0; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical nounit; /* .. Scalar Arguments .. */ @@ -1433,23 +1433,23 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(z,tbsv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_integer *k, bla_dcomplex *a, bla_integer *lda, bla_dcomplex *x, bla_integer *incx) +/* Subroutine */ int PASTEF77(z,tbsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_dcomplex *a, const bla_integer *lda, bla_dcomplex *x, const bla_integer *incx) { /* System generated locals */ bla_integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5; bla_dcomplex z__1, z__2, z__3; /* Builtin functions */ - void bla_z_div(bla_dcomplex *, bla_dcomplex *, bla_dcomplex *), bla_d_cnjg( - bla_dcomplex *, bla_dcomplex *); + //void bla_z_div(bla_dcomplex *, bla_dcomplex *, bla_dcomplex *), bla_d_cnjg( + // bla_dcomplex *, bla_dcomplex *); /* Local variables */ bla_integer info; bla_dcomplex temp; bla_integer i__, j, l; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kplus1, ix, jx, kx = 0; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical noconj, nounit; /* .. Scalar Arguments .. */ diff --git a/frame/compat/f2c/bla_tbsv.h b/frame/compat/f2c/bla_tbsv.h index 8fc6b2772..2c4a31f7a 100644 --- a/frame/compat/f2c/bla_tbsv.h +++ b/frame/compat/f2c/bla_tbsv.h @@ -34,9 +34,9 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -int PASTEF77(c,tbsv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_integer *k, bla_scomplex *a, bla_integer *lda, bla_scomplex *x, bla_integer *incx); -int PASTEF77(d,tbsv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_integer *k, bla_double *a, bla_integer *lda, bla_double *x, bla_integer *incx); -int PASTEF77(s,tbsv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_integer *k, bla_real *a, bla_integer *lda, bla_real *x, bla_integer *incx); -int PASTEF77(z,tbsv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_integer *k, bla_dcomplex *a, bla_integer *lda, bla_dcomplex *x, bla_integer *incx); +int PASTEF77(c,tbsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_scomplex *a, const bla_integer *lda, bla_scomplex *x, const bla_integer *incx); +int PASTEF77(d,tbsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_double *a, const bla_integer *lda, bla_double *x, const bla_integer *incx); +int PASTEF77(s,tbsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_real *a, const bla_integer *lda, bla_real *x, const bla_integer *incx); +int PASTEF77(z,tbsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_integer *k, const bla_dcomplex *a, const bla_integer *lda, bla_dcomplex *x, const bla_integer *incx); #endif diff --git a/frame/compat/f2c/bla_tpmv.c b/frame/compat/f2c/bla_tpmv.c index 5cafe7941..a13be84fb 100644 --- a/frame/compat/f2c/bla_tpmv.c +++ b/frame/compat/f2c/bla_tpmv.c @@ -41,22 +41,22 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(c,tpmv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_scomplex *ap, bla_scomplex *x, bla_integer *incx) +/* Subroutine */ int PASTEF77(c,tpmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_scomplex *ap, bla_scomplex *x, const bla_integer *incx) { /* System generated locals */ bla_integer i__1, i__2, i__3, i__4, i__5; bla_scomplex q__1, q__2, q__3; /* Builtin functions */ - void bla_r_cnjg(bla_scomplex *, bla_scomplex *); + //void bla_r_cnjg(bla_scomplex *, bla_scomplex *); /* Local variables */ bla_integer info; bla_scomplex temp; bla_integer i__, j, k; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, jx, kx = 0; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical noconj, nounit; /* .. Scalar Arguments .. */ @@ -542,7 +542,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(d,tpmv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_double *ap, bla_double *x, bla_integer *incx) +/* Subroutine */ int PASTEF77(d,tpmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_double *ap, bla_double *x, const bla_integer *incx) { /* System generated locals */ bla_integer i__1, i__2; @@ -551,9 +551,9 @@ bla_integer info; bla_double temp; bla_integer i__, j, k; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, jx, kx = 0; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical nounit; /* .. Scalar Arguments .. */ @@ -890,7 +890,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(s,tpmv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_real *ap, bla_real *x, bla_integer *incx) +/* Subroutine */ int PASTEF77(s,tpmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_real *ap, bla_real *x, const bla_integer *incx) { /* System generated locals */ bla_integer i__1, i__2; @@ -899,9 +899,9 @@ bla_integer info; bla_real temp; bla_integer i__, j, k; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, jx, kx = 0; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical nounit; /* .. Scalar Arguments .. */ @@ -1238,22 +1238,22 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(z,tpmv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_dcomplex *ap, bla_dcomplex *x, bla_integer *incx) +/* Subroutine */ int PASTEF77(z,tpmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_dcomplex *ap, bla_dcomplex *x, const bla_integer *incx) { /* System generated locals */ bla_integer i__1, i__2, i__3, i__4, i__5; bla_dcomplex z__1, z__2, z__3; /* Builtin functions */ - void bla_d_cnjg(bla_dcomplex *, bla_dcomplex *); + //void bla_d_cnjg(bla_dcomplex *, bla_dcomplex *); /* Local variables */ bla_integer info; bla_dcomplex temp; bla_integer i__, j, k; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, jx, kx = 0; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical noconj, nounit; /* .. Scalar Arguments .. */ diff --git a/frame/compat/f2c/bla_tpmv.h b/frame/compat/f2c/bla_tpmv.h index 68c841dd0..75be91681 100644 --- a/frame/compat/f2c/bla_tpmv.h +++ b/frame/compat/f2c/bla_tpmv.h @@ -34,9 +34,9 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -int PASTEF77(c,tpmv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_scomplex *ap, bla_scomplex *x, bla_integer *incx); -int PASTEF77(d,tpmv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_double *ap, bla_double *x, bla_integer *incx); -int PASTEF77(s,tpmv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_real *ap, bla_real *x, bla_integer *incx); -int PASTEF77(z,tpmv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_dcomplex *ap, bla_dcomplex *x, bla_integer *incx); +int PASTEF77(c,tpmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_scomplex *ap, bla_scomplex *x, const bla_integer *incx); +int PASTEF77(d,tpmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_double *ap, bla_double *x, const bla_integer *incx); +int PASTEF77(s,tpmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_real *ap, bla_real *x, const bla_integer *incx); +int PASTEF77(z,tpmv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_dcomplex *ap, bla_dcomplex *x, const bla_integer *incx); #endif diff --git a/frame/compat/f2c/bla_tpsv.c b/frame/compat/f2c/bla_tpsv.c index 2ff23575d..cad8d12c8 100644 --- a/frame/compat/f2c/bla_tpsv.c +++ b/frame/compat/f2c/bla_tpsv.c @@ -41,22 +41,22 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(c,tpsv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_scomplex *ap, bla_scomplex *x, bla_integer *incx) +/* Subroutine */ int PASTEF77(c,tpsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_scomplex *ap, bla_scomplex *x, const bla_integer *incx) { /* System generated locals */ bla_integer i__1, i__2, i__3, i__4, i__5; bla_scomplex q__1, q__2, q__3; /* Builtin functions */ - void bla_c_div(bla_scomplex *, bla_scomplex *, bla_scomplex *), bla_r_cnjg(bla_scomplex *, bla_scomplex *); + //void bla_c_div(bla_scomplex *, bla_scomplex *, bla_scomplex *), bla_r_cnjg(bla_scomplex *, bla_scomplex *); /* Local variables */ bla_integer info; bla_scomplex temp; bla_integer i__, j, k; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, jx, kx = 0; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical noconj, nounit; /* .. Scalar Arguments .. */ @@ -534,7 +534,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(d,tpsv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_double *ap, bla_double *x, bla_integer *incx) +/* Subroutine */ int PASTEF77(d,tpsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_double *ap, bla_double *x, const bla_integer *incx) { /* System generated locals */ bla_integer i__1, i__2; @@ -543,9 +543,9 @@ bla_integer info; bla_double temp; bla_integer i__, j, k; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, jx, kx = 0; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical nounit; /* .. Scalar Arguments .. */ @@ -885,7 +885,7 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(s,tpsv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_real *ap, bla_real *x, bla_integer *incx) +/* Subroutine */ int PASTEF77(s,tpsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_real *ap, bla_real *x, const bla_integer *incx) { /* System generated locals */ bla_integer i__1, i__2; @@ -894,9 +894,9 @@ bla_integer info; bla_real temp; bla_integer i__, j, k; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, jx, kx = 0; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical nounit; /* .. Scalar Arguments .. */ @@ -1236,23 +1236,23 @@ -lf2c -lm (in that order) */ -/* Subroutine */ int PASTEF77(z,tpsv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_dcomplex *ap, bla_dcomplex *x, bla_integer *incx) +/* Subroutine */ int PASTEF77(z,tpsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_dcomplex *ap, bla_dcomplex *x, const bla_integer *incx) { /* System generated locals */ bla_integer i__1, i__2, i__3, i__4, i__5; bla_dcomplex z__1, z__2, z__3; /* Builtin functions */ - void bla_z_div(bla_dcomplex *, bla_dcomplex *, bla_dcomplex *), bla_d_cnjg( - bla_dcomplex *, bla_dcomplex *); + //void bla_z_div(bla_dcomplex *, bla_dcomplex *, bla_dcomplex *), bla_d_cnjg( + // bla_dcomplex *, bla_dcomplex *); /* Local variables */ bla_integer info; bla_dcomplex temp; bla_integer i__, j, k; - extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); + //extern bla_logical PASTEF770(lsame)(bla_character *, bla_character *, ftnlen, ftnlen); bla_integer kk, ix, jx, kx = 0; - extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); + //extern /* Subroutine */ int PASTEF770(xerbla)(bla_character *, bla_integer *, ftnlen); bla_logical noconj, nounit; /* .. Scalar Arguments .. */ diff --git a/frame/compat/f2c/bla_tpsv.h b/frame/compat/f2c/bla_tpsv.h index 1dcbeebfe..fa94399ed 100644 --- a/frame/compat/f2c/bla_tpsv.h +++ b/frame/compat/f2c/bla_tpsv.h @@ -34,9 +34,9 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -int PASTEF77(c,tpsv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_scomplex *ap, bla_scomplex *x, bla_integer *incx); -int PASTEF77(d,tpsv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_double *ap, bla_double *x, bla_integer *incx); -int PASTEF77(s,tpsv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_real *ap, bla_real *x, bla_integer *incx); -int PASTEF77(z,tpsv)(bla_character *uplo, bla_character *trans, bla_character *diag, bla_integer *n, bla_dcomplex *ap, bla_dcomplex *x, bla_integer *incx); +int PASTEF77(c,tpsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_scomplex *ap, bla_scomplex *x, const bla_integer *incx); +int PASTEF77(d,tpsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_double *ap, bla_double *x, const bla_integer *incx); +int PASTEF77(s,tpsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_real *ap, bla_real *x, const bla_integer *incx); +int PASTEF77(z,tpsv)(const bla_character *uplo, const bla_character *trans, const bla_character *diag, const bla_integer *n, const bla_dcomplex *ap, bla_dcomplex *x, const bla_integer *incx); #endif diff --git a/frame/compat/f2c/bla_xerbla.c b/frame/compat/f2c/bla_xerbla.c index 20a14702c..3968e2f44 100644 --- a/frame/compat/f2c/bla_xerbla.c +++ b/frame/compat/f2c/bla_xerbla.c @@ -43,7 +43,7 @@ /* Table of constant values */ -/* Subroutine */ int PASTEF770(xerbla)(bla_character *srname, bla_integer *info, ftnlen srname_len) +/* Subroutine */ int PASTEF770(xerbla)(const bla_character *srname, const bla_integer *info, ftnlen srname_len) { /* -- LAPACK auxiliary routine (preliminary version) -- */ /* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ @@ -72,10 +72,10 @@ /* INFO (input) INTEGER */ /* The position of the invalid parameter in the parameter list */ /* of the calling routine. */ - int i; + //int i; - for ( i = 0; i < srname_len; ++i ) - srname[i] = toupper( srname[i] ); + //for ( i = 0; i < srname_len; ++i ) + // srname[i] = toupper( srname[i] ); printf("** On entry to %6s, parameter number %2i had an illegal value\n", srname, (int)*info); diff --git a/frame/compat/f2c/bla_xerbla.h b/frame/compat/f2c/bla_xerbla.h index 597f467b8..7d1d814a4 100644 --- a/frame/compat/f2c/bla_xerbla.h +++ b/frame/compat/f2c/bla_xerbla.h @@ -34,6 +34,6 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -int PASTEF770(xerbla)(bla_character *srname, bla_integer *info, ftnlen srname_len); +int PASTEF770(xerbla)(const bla_character *srname, const bla_integer *info, ftnlen srname_len); #endif diff --git a/frame/compat/f2c/util/bla_c_abs.c b/frame/compat/f2c/util/bla_c_abs.c index 0e941f358..03b245384 100644 --- a/frame/compat/f2c/util/bla_c_abs.c +++ b/frame/compat/f2c/util/bla_c_abs.c @@ -36,7 +36,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -double bla_c_abs(bla_scomplex *z) +double bla_c_abs(const bla_scomplex *z) { return( bla_f__cabs( bli_creal( *z ), bli_cimag( *z ) ) ); diff --git a/frame/compat/f2c/util/bla_c_abs.h b/frame/compat/f2c/util/bla_c_abs.h index 5531e2cea..3d3a01ddc 100644 --- a/frame/compat/f2c/util/bla_c_abs.h +++ b/frame/compat/f2c/util/bla_c_abs.h @@ -34,7 +34,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -double bla_c_abs(bla_scomplex *z); +double bla_c_abs(const bla_scomplex *z); #endif diff --git a/frame/compat/f2c/util/bla_c_div.c b/frame/compat/f2c/util/bla_c_div.c index d358218c2..cf1412646 100644 --- a/frame/compat/f2c/util/bla_c_div.c +++ b/frame/compat/f2c/util/bla_c_div.c @@ -36,7 +36,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -void bla_c_div(bla_scomplex *cp, bla_scomplex *ap, bla_scomplex *bp) +void bla_c_div(bla_scomplex *cp, const bla_scomplex *ap, const bla_scomplex *bp) { bli_ccopys( *ap, *cp ); bli_cinvscals( *bp, *cp ); diff --git a/frame/compat/f2c/util/bla_c_div.h b/frame/compat/f2c/util/bla_c_div.h index fdfdb177a..9b2103a5c 100644 --- a/frame/compat/f2c/util/bla_c_div.h +++ b/frame/compat/f2c/util/bla_c_div.h @@ -34,7 +34,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -void bla_c_div(bla_scomplex *cp, bla_scomplex *ap, bla_scomplex *bp); +void bla_c_div(bla_scomplex *cp, const bla_scomplex *ap, const bla_scomplex *bp); #endif diff --git a/frame/compat/f2c/util/bla_d_abs.c b/frame/compat/f2c/util/bla_d_abs.c index 90519cce9..741eca66e 100644 --- a/frame/compat/f2c/util/bla_d_abs.c +++ b/frame/compat/f2c/util/bla_d_abs.c @@ -36,7 +36,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -double bla_d_abs(bla_double *x) +double bla_d_abs(const bla_double *x) { if(*x >= 0.0) return(*x); diff --git a/frame/compat/f2c/util/bla_d_abs.h b/frame/compat/f2c/util/bla_d_abs.h index 6166f8c83..fd634cb66 100644 --- a/frame/compat/f2c/util/bla_d_abs.h +++ b/frame/compat/f2c/util/bla_d_abs.h @@ -34,7 +34,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -double bla_d_abs(bla_double *x); +double bla_d_abs(const bla_double *x); #endif diff --git a/frame/compat/f2c/util/bla_d_cnjg.c b/frame/compat/f2c/util/bla_d_cnjg.c index 05b76f963..e72523d60 100644 --- a/frame/compat/f2c/util/bla_d_cnjg.c +++ b/frame/compat/f2c/util/bla_d_cnjg.c @@ -36,7 +36,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -void bla_d_cnjg(bla_dcomplex *dest, bla_dcomplex *src) +void bla_d_cnjg(bla_dcomplex *dest, const bla_dcomplex *src) { bli_zcopyjs( *src, *dest ); } diff --git a/frame/compat/f2c/util/bla_d_cnjg.h b/frame/compat/f2c/util/bla_d_cnjg.h index 1e8b9129b..a318c0737 100644 --- a/frame/compat/f2c/util/bla_d_cnjg.h +++ b/frame/compat/f2c/util/bla_d_cnjg.h @@ -34,7 +34,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -void bla_d_cnjg(bla_dcomplex *dest, bla_dcomplex *src); +void bla_d_cnjg(bla_dcomplex *dest, const bla_dcomplex *src); #endif diff --git a/frame/compat/f2c/util/bla_d_imag.c b/frame/compat/f2c/util/bla_d_imag.c index 2a5c5dba3..350124399 100644 --- a/frame/compat/f2c/util/bla_d_imag.c +++ b/frame/compat/f2c/util/bla_d_imag.c @@ -36,7 +36,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -double bla_d_imag(bla_dcomplex *z) +double bla_d_imag(const bla_dcomplex *z) { return bli_zimag( *z ); } diff --git a/frame/compat/f2c/util/bla_d_imag.h b/frame/compat/f2c/util/bla_d_imag.h index 5e2fa0c14..180e344f3 100644 --- a/frame/compat/f2c/util/bla_d_imag.h +++ b/frame/compat/f2c/util/bla_d_imag.h @@ -34,7 +34,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -double bla_d_imag(bla_dcomplex *z); +double bla_d_imag(const bla_dcomplex *z); #endif diff --git a/frame/compat/f2c/util/bla_d_sign.c b/frame/compat/f2c/util/bla_d_sign.c index f7b19b89f..926db5508 100644 --- a/frame/compat/f2c/util/bla_d_sign.c +++ b/frame/compat/f2c/util/bla_d_sign.c @@ -36,7 +36,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -double bla_d_sign(bla_double *a, bla_double *b) +double bla_d_sign(const bla_double *a, const bla_double *b) { double x = (*a >= 0.0 ? *a : - *a); diff --git a/frame/compat/f2c/util/bla_d_sign.h b/frame/compat/f2c/util/bla_d_sign.h index 74f4b015f..f2054a71b 100644 --- a/frame/compat/f2c/util/bla_d_sign.h +++ b/frame/compat/f2c/util/bla_d_sign.h @@ -34,7 +34,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -double bla_d_sign(bla_double *a, bla_double *b); +double bla_d_sign(const bla_double *a, const bla_double *b); #endif diff --git a/frame/compat/f2c/util/bla_f__cabs.h b/frame/compat/f2c/util/bla_f__cabs.h index 56c7faec2..6864b6ef4 100644 --- a/frame/compat/f2c/util/bla_f__cabs.h +++ b/frame/compat/f2c/util/bla_f__cabs.h @@ -34,7 +34,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -double bla_f__cabs(double bla_real, double imag); +double bla_f__cabs(double real, double imag); #endif diff --git a/frame/compat/f2c/util/bla_r_abs.c b/frame/compat/f2c/util/bla_r_abs.c index d65b83096..bb008ffa8 100644 --- a/frame/compat/f2c/util/bla_r_abs.c +++ b/frame/compat/f2c/util/bla_r_abs.c @@ -36,7 +36,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -double bla_r_abs(bla_real *x) +double bla_r_abs(const bla_real *x) { if(*x >= 0.0) return(*x); diff --git a/frame/compat/f2c/util/bla_r_abs.h b/frame/compat/f2c/util/bla_r_abs.h index 5f9f7416c..b985f15fa 100644 --- a/frame/compat/f2c/util/bla_r_abs.h +++ b/frame/compat/f2c/util/bla_r_abs.h @@ -34,7 +34,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -double bla_r_abs(bla_real *x); +double bla_r_abs(const bla_real *x); #endif diff --git a/frame/compat/f2c/util/bla_r_cnjg.c b/frame/compat/f2c/util/bla_r_cnjg.c index ebfb136e4..a3c2320d3 100644 --- a/frame/compat/f2c/util/bla_r_cnjg.c +++ b/frame/compat/f2c/util/bla_r_cnjg.c @@ -36,7 +36,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -void bla_r_cnjg(bla_scomplex *dest, bla_scomplex *src) +void bla_r_cnjg(bla_scomplex *dest, const bla_scomplex *src) { bli_ccopyjs( *src, *dest ); } diff --git a/frame/compat/f2c/util/bla_r_cnjg.h b/frame/compat/f2c/util/bla_r_cnjg.h index 9da9c8f87..a4c106d19 100644 --- a/frame/compat/f2c/util/bla_r_cnjg.h +++ b/frame/compat/f2c/util/bla_r_cnjg.h @@ -34,7 +34,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -void bla_r_cnjg(bla_scomplex *dest, bla_scomplex *src); +void bla_r_cnjg(bla_scomplex *dest, const bla_scomplex *src); #endif diff --git a/frame/compat/f2c/util/bla_r_imag.c b/frame/compat/f2c/util/bla_r_imag.c index 754d23287..f48888895 100644 --- a/frame/compat/f2c/util/bla_r_imag.c +++ b/frame/compat/f2c/util/bla_r_imag.c @@ -36,7 +36,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -bla_real bla_r_imag(bla_scomplex *z) +bla_real bla_r_imag(const bla_scomplex *z) { return bli_cimag( *z ); } diff --git a/frame/compat/f2c/util/bla_r_imag.h b/frame/compat/f2c/util/bla_r_imag.h index ecb5d0178..16903fe32 100644 --- a/frame/compat/f2c/util/bla_r_imag.h +++ b/frame/compat/f2c/util/bla_r_imag.h @@ -34,7 +34,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -bla_real bla_r_imag(bla_scomplex *z); +bla_real bla_r_imag(const bla_scomplex *z); #endif diff --git a/frame/compat/f2c/util/bla_r_sign.c b/frame/compat/f2c/util/bla_r_sign.c index 29f4ae06d..777413495 100644 --- a/frame/compat/f2c/util/bla_r_sign.c +++ b/frame/compat/f2c/util/bla_r_sign.c @@ -36,7 +36,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -double bla_r_sign(bla_real *a, bla_real *b) +double bla_r_sign(const bla_real *a, const bla_real *b) { double x = (*a >= 0.0 ? *a : - *a); diff --git a/frame/compat/f2c/util/bla_r_sign.h b/frame/compat/f2c/util/bla_r_sign.h index 595ad4b4d..6ade10790 100644 --- a/frame/compat/f2c/util/bla_r_sign.h +++ b/frame/compat/f2c/util/bla_r_sign.h @@ -34,7 +34,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -double bla_r_sign(bla_real *a, bla_real *b); +double bla_r_sign(const bla_real *a, const bla_real *b); #endif diff --git a/frame/compat/f2c/util/bla_z_abs.c b/frame/compat/f2c/util/bla_z_abs.c index 0ad781aee..378aa9af0 100644 --- a/frame/compat/f2c/util/bla_z_abs.c +++ b/frame/compat/f2c/util/bla_z_abs.c @@ -36,7 +36,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -double bla_z_abs(bla_dcomplex *z) +double bla_z_abs(const bla_dcomplex *z) { return( bla_f__cabs( bli_zreal( *z ), bli_zimag( *z ) ) ); diff --git a/frame/compat/f2c/util/bla_z_abs.h b/frame/compat/f2c/util/bla_z_abs.h index ba6236f40..9e29975cd 100644 --- a/frame/compat/f2c/util/bla_z_abs.h +++ b/frame/compat/f2c/util/bla_z_abs.h @@ -34,7 +34,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -double bla_z_abs(bla_dcomplex *z); +double bla_z_abs(const bla_dcomplex *z); #endif diff --git a/frame/compat/f2c/util/bla_z_div.c b/frame/compat/f2c/util/bla_z_div.c index b32ef3524..6a16f3d2a 100644 --- a/frame/compat/f2c/util/bla_z_div.c +++ b/frame/compat/f2c/util/bla_z_div.c @@ -36,7 +36,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -void bla_z_div(bla_dcomplex *cp, bla_dcomplex *ap, bla_dcomplex *bp) +void bla_z_div(bla_dcomplex *cp, const bla_dcomplex *ap, const bla_dcomplex *bp) { bli_zcopys( *ap, *cp ); bli_zinvscals( *bp, *cp ); diff --git a/frame/compat/f2c/util/bla_z_div.h b/frame/compat/f2c/util/bla_z_div.h index daca277e3..bfe0d85e0 100644 --- a/frame/compat/f2c/util/bla_z_div.h +++ b/frame/compat/f2c/util/bla_z_div.h @@ -34,7 +34,7 @@ #ifdef BLIS_ENABLE_BLAS2BLIS -void bla_z_div(bla_dcomplex *cp, bla_dcomplex *ap, bla_dcomplex *bp); +void bla_z_div(bla_dcomplex *cp, const bla_dcomplex *ap, const bla_dcomplex *bp); #endif diff --git a/frame/include/blis.h b/frame/include/blis.h index 9bfedd71a..86a8c0565 100644 --- a/frame/include/blis.h +++ b/frame/include/blis.h @@ -162,15 +162,16 @@ extern "C" { #include "bli_util.h" -// -- CBLAS compatibility layer -- - -#include "bli_cblas.h" - // -- BLAS compatibility layer -- #include "bli_blas.h" +// -- CBLAS compatibility layer -- + +#include "bli_cblas.h" + + // End extern "C" construct block. #ifdef __cplusplus } From 4136553f0d0661a668dfdb9edcd7ce1c5773dde7 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Fri, 22 Apr 2016 11:53:53 -0500 Subject: [PATCH 06/14] Clear level-3 cntx_t's via memset() before use. Details: - In all level-3 operations' _cntx_init() functions, replaced calls to bli_cntx_obj_init() with calls to bli_cntx_obj_clear(), and in all level-3 operations' _cntx_finalize() functions, removed calls to bli_cntx_obj_finalize(), leaving those function definitions empty. - Changed the definition of bli_cntx_obj_clear() so that the clearing occurs via a single call to memset(). --- frame/3/bli_l3_cntx.c | 12 +++----- frame/base/bli_cntx.c | 51 ++----------------------------- frame/ind/cntx/bli_gemmind_cntx.c | 42 +++++++++---------------- frame/ind/cntx/bli_trsmind_cntx.c | 12 +++----- 4 files changed, 24 insertions(+), 93 deletions(-) diff --git a/frame/3/bli_l3_cntx.c b/frame/3/bli_l3_cntx.c index 5a51dc3d6..634e4c1ab 100644 --- a/frame/3/bli_l3_cntx.c +++ b/frame/3/bli_l3_cntx.c @@ -40,9 +40,8 @@ void bli_gemm_cntx_init( cntx_t* cntx ) { - bli_cntx_obj_create( cntx ); - - //bli_cntx_obj_clear( cntx ); + // Clear the context fields. + bli_cntx_obj_clear( cntx ); // Initialize the context with the current architecture's native // level-3 gemm micro-kernel, and its output preferences. @@ -71,16 +70,14 @@ void bli_gemm_cntx_init( cntx_t* cntx ) void bli_gemm_cntx_finalize( cntx_t* cntx ) { - bli_cntx_obj_free( cntx ); } // ----------------------------------------------------------------------------- void bli_trsm_cntx_init( cntx_t* cntx ) { - bli_cntx_obj_create( cntx ); - - //bli_cntx_obj_clear( cntx ); + // Clear the context fields. + bli_cntx_obj_clear( cntx ); // Initialize the context with the current architecture's native // level-3 gemm micro-kernel, and its output preferences. @@ -116,6 +113,5 @@ void bli_trsm_cntx_init( cntx_t* cntx ) void bli_trsm_cntx_finalize( cntx_t* cntx ) { - bli_cntx_obj_free( cntx ); } diff --git a/frame/base/bli_cntx.c b/frame/base/bli_cntx.c index c1043dfbe..a2fedb755 100644 --- a/frame/base/bli_cntx.c +++ b/frame/base/bli_cntx.c @@ -56,55 +56,8 @@ void bli_cntx_obj_free( cntx_t* cntx ) void bli_cntx_obj_clear( cntx_t* cntx ) { - blksz_t* blkszs = bli_cntx_blkszs_buf( cntx ); - bszid_t* bmults = bli_cntx_bmults_buf( cntx ); - func_t* l3_vir_ukrs = bli_cntx_l3_vir_ukrs_buf( cntx ); - func_t* l3_nat_ukrs = bli_cntx_l3_nat_ukrs_buf( cntx ); - mbool_t* l3_nat_ukrs_prefs = bli_cntx_l3_nat_ukrs_prefs_buf( cntx ); - func_t* l1f_kers = bli_cntx_l1f_kers_buf( cntx ); - func_t* l1v_kers = bli_cntx_l1v_kers_buf( cntx ); - func_t* packm_ukrs = bli_cntx_packm_ukrs_buf( cntx ); - - dim_t i; - - // Initialize all of the elements of every array to a sane initial - // value. (Strictly speaking, there is no "null" value for typedef'ed - // enums such as bszid_t, so we cheat a little by using 0.) - - func_t null_func = { { NULL, NULL, NULL, NULL } }; - blksz_t null_blksz = { { 0, 0, 0, 0, } }; - mbool_t null_mbool = { { FALSE, FALSE, FALSE, FALSE } }; - bszid_t null_bszid = 0; - - for ( i = 0; i < BLIS_NUM_BLKSZS; ++i ) - { - blkszs[ i ] = null_blksz; - } - for ( i = 0; i < BLIS_NUM_BLKSZS; ++i ) - { - bmults[ i ] = null_bszid; - } - for ( i = 0; i < BLIS_NUM_LEVEL3_UKRS; ++i ) - { - l3_vir_ukrs[ i ] = null_func; - l3_nat_ukrs[ i ] = null_func; - l3_nat_ukrs_prefs[ i ] = null_mbool; - } - for ( i = 0; i < BLIS_NUM_LEVEL1F_KERS; ++i ) - { - l1f_kers[ i ] = null_func; - } - for ( i = 0; i < BLIS_NUM_LEVEL1V_KERS; ++i ) - { - l1v_kers[ i ] = null_func; - } - { - packm_ukrs[ 0 ] = null_func; - } - - // NOTE: It doesn't make sense to initialize method or schema fields - // at this time; the method field would normally be set by _set_blkszs() - // and the schema fields are set by _set_pack_schema_[abc](). + // Fill the entire cntx_t structure with zeros. + memset( ( void* )cntx, 0, sizeof( cntx ) ); } void bli_cntx_init( cntx_t* cntx ) diff --git a/frame/ind/cntx/bli_gemmind_cntx.c b/frame/ind/cntx/bli_gemmind_cntx.c index b8d777f86..a484cf1a1 100644 --- a/frame/ind/cntx/bli_gemmind_cntx.c +++ b/frame/ind/cntx/bli_gemmind_cntx.c @@ -109,8 +109,8 @@ void bli_gemm3m1_cntx_init( cntx_t* cntx ) { const ind_t method = BLIS_3M1; - // Perform basic setup on the context. - bli_cntx_obj_create( cntx ); + // Clear the context fields. + bli_cntx_obj_clear( cntx ); // Initialize the context with the current architecture's native // level-3 gemm micro-kernel, and its output preferences. @@ -147,8 +147,6 @@ void bli_gemm3m1_cntx_stage( dim_t stage, cntx_t* cntx ) void bli_gemm3m1_cntx_finalize( cntx_t* cntx ) { - // Free the context and all memory allocated to it. - bli_cntx_obj_free( cntx ); } // ----------------------------------------------------------------------------- @@ -157,8 +155,8 @@ void bli_gemm3m2_cntx_init( cntx_t* cntx ) { const ind_t method = BLIS_3M2; - // Perform basic setup on the context. - bli_cntx_obj_create( cntx ); + // Clear the context fields. + bli_cntx_obj_clear( cntx ); // Initialize the context with the current architecture's native // level-3 gemm micro-kernel, and its output preferences. @@ -195,8 +193,6 @@ void bli_gemm3m2_cntx_stage( dim_t stage, cntx_t* cntx ) void bli_gemm3m2_cntx_finalize( cntx_t* cntx ) { - // Free the context and all memory allocated to it. - bli_cntx_obj_free( cntx ); } // ----------------------------------------------------------------------------- @@ -205,8 +201,8 @@ void bli_gemm3m3_cntx_init( cntx_t* cntx ) { const ind_t method = BLIS_3M3; - // Perform basic setup on the context. - bli_cntx_obj_create( cntx ); + // Clear the context fields. + bli_cntx_obj_clear( cntx ); // Initialize the context with the current architecture's native // level-3 gemm micro-kernel, and its output preferences. @@ -256,8 +252,6 @@ void bli_gemm3m3_cntx_stage( dim_t stage, cntx_t* cntx ) void bli_gemm3m3_cntx_finalize( cntx_t* cntx ) { - // Free the context and all memory allocated to it. - bli_cntx_obj_free( cntx ); } // ----------------------------------------------------------------------------- @@ -266,8 +260,8 @@ void bli_gemm3mh_cntx_init( cntx_t* cntx ) { const ind_t method = BLIS_3MH; - // Perform basic setup on the context. - bli_cntx_obj_create( cntx ); + // Clear the context fields. + bli_cntx_obj_clear( cntx ); // Initialize the context with the current architecture's native // level-3 gemm micro-kernel, and its output preferences. @@ -320,8 +314,6 @@ void bli_gemm3mh_cntx_stage( dim_t stage, cntx_t* cntx ) void bli_gemm3mh_cntx_finalize( cntx_t* cntx ) { - // Free the context and all memory allocated to it. - bli_cntx_obj_free( cntx ); } // ----------------------------------------------------------------------------- @@ -330,8 +322,8 @@ void bli_gemm4m1_cntx_init( cntx_t* cntx ) { const ind_t method = BLIS_4M1A; - // Perform basic setup on the context. - bli_cntx_obj_create( cntx ); + // Clear the context fields. + bli_cntx_obj_clear( cntx ); // Initialize the context with the current architecture's native // level-3 gemm micro-kernel, and its output preferences. @@ -368,8 +360,6 @@ void bli_gemm4m1_cntx_stage( dim_t stage, cntx_t* cntx ) void bli_gemm4m1_cntx_finalize( cntx_t* cntx ) { - // Free the context and all memory allocated to it. - bli_cntx_obj_free( cntx ); } // ----------------------------------------------------------------------------- @@ -378,8 +368,8 @@ void bli_gemm4mb_cntx_init( cntx_t* cntx ) { const ind_t method = BLIS_4M1B; - // Perform basic setup on the context. - bli_cntx_obj_create( cntx ); + // Clear the context fields. + bli_cntx_obj_clear( cntx ); // Initialize the context with the current architecture's native // level-3 gemm micro-kernel, and its output preferences. @@ -416,8 +406,6 @@ void bli_gemm4mb_cntx_stage( dim_t stage, cntx_t* cntx ) void bli_gemm4mb_cntx_finalize( cntx_t* cntx ) { - // Free the context and all memory allocated to it. - bli_cntx_obj_free( cntx ); } // ----------------------------------------------------------------------------- @@ -426,8 +414,8 @@ void bli_gemm4mh_cntx_init( cntx_t* cntx ) { const ind_t method = BLIS_4MH; - // Perform basic setup on the context. - bli_cntx_obj_create( cntx ); + // Clear the context fields. + bli_cntx_obj_clear( cntx ); // Initialize the context with the current architecture's native // level-3 gemm micro-kernel, and its output preferences. @@ -485,8 +473,6 @@ void bli_gemm4mh_cntx_stage( dim_t stage, cntx_t* cntx ) void bli_gemm4mh_cntx_finalize( cntx_t* cntx ) { - // Free the context and all memory allocated to it. - bli_cntx_obj_free( cntx ); } // ----------------------------------------------------------------------------- diff --git a/frame/ind/cntx/bli_trsmind_cntx.c b/frame/ind/cntx/bli_trsmind_cntx.c index c1e8057ce..85212ba90 100644 --- a/frame/ind/cntx/bli_trsmind_cntx.c +++ b/frame/ind/cntx/bli_trsmind_cntx.c @@ -40,8 +40,8 @@ void bli_trsm3m1_cntx_init( cntx_t* cntx ) { const ind_t method = BLIS_3M1; - // Perform basic setup on the context. - bli_cntx_obj_create( cntx ); + // Clear the context fields. + bli_cntx_obj_clear( cntx ); // Initialize the context with the current architecture's native // level-3 gemm micro-kernel, and its output preferences. @@ -78,8 +78,6 @@ void bli_trsm3m1_cntx_init( cntx_t* cntx ) void bli_trsm3m1_cntx_finalize( cntx_t* cntx ) { - // Free the context and all memory allocated to it. - bli_cntx_obj_free( cntx ); } // ----------------------------------------------------------------------------- @@ -88,8 +86,8 @@ void bli_trsm4m1_cntx_init( cntx_t* cntx ) { const ind_t method = BLIS_4M1A; - // Perform basic setup on the context. - bli_cntx_obj_create( cntx ); + // Clear the context fields. + bli_cntx_obj_clear( cntx ); // Initialize the context with the current architecture's native // level-3 gemm micro-kernel, and its output preferences. @@ -126,8 +124,6 @@ void bli_trsm4m1_cntx_init( cntx_t* cntx ) void bli_trsm4m1_cntx_finalize( cntx_t* cntx ) { - // Free the context and all memory allocated to it. - bli_cntx_obj_free( cntx ); } // ----------------------------------------------------------------------------- From f1e9be2aba1a057eedb947bbae96848597777408 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Fri, 22 Apr 2016 15:34:02 -0500 Subject: [PATCH 07/14] Minor tweak to test/Makefile. Details: - Just committing a minor change to test/Makefile that has been lingering in my local working copy for longer than I can remember. --- test/Makefile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/Makefile b/test/Makefile index fa01c9e79..bbde792dc 100644 --- a/test/Makefile +++ b/test/Makefile @@ -175,7 +175,7 @@ CFLAGS += -I$(BLIS_INC_PATH) -I$(TEST_SRC_PATH) LINKER := $(CC) LDFLAGS := #-L/home/00146/field/gnu/gcc-4.8.2/lib64 -LDFLAGS += -lgfortran -lm -lpthread +LDFLAGS += -lgfortran -lm -lpthread -fopenmp @@ -187,7 +187,8 @@ LDFLAGS += -lgfortran -lm -lpthread # # blis openblas atlas mkl mac essl # -all: blis openblas #atlas mkl +#all: blis openblas atlas mkl +all: blis openblas blis: test_gemv_blis.x \ test_ger_blis.x \ From bdbda6e6acc682ab1b6ca680edebd09ae12a832c Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Mon, 25 Apr 2016 11:05:57 -0500 Subject: [PATCH 08/14] Give the level1v operations some love: - Add missing axpby and xpby operations (plus test cases). - Add special case for scal2v with alpha=1. - Add restrict qualifiers. - Add special-case algorithms for incx=incy=1. --- Makefile | 4 +- frame/1/bli_l1v_check.c | 185 +++++++-- frame/1/bli_l1v_check.h | 42 ++ frame/1/bli_l1v_cntx.c | 64 ++- frame/1/bli_l1v_cntx.h | 2 + frame/1/bli_l1v_ft.h | 35 ++ frame/1/bli_l1v_ker.h | 33 ++ frame/1/bli_l1v_oapi.c | 113 +++++ frame/1/bli_l1v_oapi.h | 29 ++ frame/1/bli_l1v_tapi.c | 74 ++++ frame/1/bli_l1v_tapi.h | 6 + frame/1/kernels/bli_addv_ref.c | 56 ++- frame/1/kernels/bli_axpbyv_ref.c | 248 +++++++++++ frame/1/kernels/bli_axpyv_ref.c | 58 ++- frame/1/kernels/bli_copyv_ref.c | 56 ++- frame/1/kernels/bli_dotv_ref.c | 58 ++- frame/1/kernels/bli_dotxv_ref.c | 62 ++- frame/1/kernels/bli_invertv_ref.c | 26 +- frame/1/kernels/bli_l1v_ref.h | 69 +++- frame/1/kernels/bli_scal2v_ref.c | 75 +++- frame/1/kernels/bli_scalv_ref.c | 28 +- frame/1/kernels/bli_setv_ref.c | 50 ++- frame/1/kernels/bli_subv_ref.c | 52 ++- frame/1/kernels/bli_swapv_ref.c | 32 +- frame/1/kernels/bli_xpbyv_ref.c | 137 ++++++ frame/base/bli_gks.c | 12 + frame/include/bli_kernel_macro_defs.h | 36 ++ frame/include/bli_kernel_pre_macro_defs.h | 14 + frame/include/bli_scalar_macro_defs.h | 8 + frame/include/bli_type_defs.h | 4 +- frame/include/level0/bli_axpbyjs.h | 481 ++++++++++++++++++++++ frame/include/level0/bli_axpbys.h | 481 ++++++++++++++++++++++ frame/include/level0/bli_xpbyjs.h | 192 +++++++++ frame/include/level0/ri/bli_axpbyjris.h | 163 ++++++++ frame/include/level0/ri/bli_axpbyris.h | 104 ++++- frame/include/level0/ri/bli_xpbyjris.h | 79 ++++ testsuite/input.operations | 10 + testsuite/src/test_axpbyv.c | 282 +++++++++++++ testsuite/src/test_axpbyv.h | 36 ++ testsuite/src/test_libblis.c | 6 +- testsuite/src/test_libblis.h | 6 +- testsuite/src/test_xpbyv.c | 268 ++++++++++++ testsuite/src/test_xpbyv.h | 36 ++ 43 files changed, 3579 insertions(+), 233 deletions(-) create mode 100644 frame/1/kernels/bli_axpbyv_ref.c create mode 100644 frame/1/kernels/bli_xpbyv_ref.c create mode 100644 frame/include/level0/bli_axpbyjs.h create mode 100644 frame/include/level0/bli_axpbys.h create mode 100644 frame/include/level0/bli_xpbyjs.h create mode 100644 frame/include/level0/ri/bli_axpbyjris.h create mode 100644 frame/include/level0/ri/bli_xpbyjris.h create mode 100644 testsuite/src/test_axpbyv.c create mode 100644 testsuite/src/test_axpbyv.h create mode 100644 testsuite/src/test_xpbyv.c create mode 100644 testsuite/src/test_xpbyv.h diff --git a/Makefile b/Makefile index e52cebd57..5ac386fec 100644 --- a/Makefile +++ b/Makefile @@ -678,11 +678,11 @@ endif cleantest: check-env ifeq ($(BLIS_ENABLE_VERBOSE_MAKE_OUTPUT),yes) - - $(FIND) $(BASE_OBJ_TESTSUITE_PATH) -name "*.o" -name "*.pexe" | $(XARGS) $(RM_F) + - $(FIND) $(BASE_OBJ_TESTSUITE_PATH) \( -name "*.o" -o -name "*.pexe" \) | $(XARGS) $(RM_F) - $(RM_RF) $(TESTSUITE_BIN) else @echo "Removing object files from $(BASE_OBJ_TESTSUITE_PATH)." - @- $(FIND) $(BASE_OBJ_TESTSUITE_PATH) -name "*.o" -name "*.pexe" | $(XARGS) $(RM_F) + @- $(FIND) $(BASE_OBJ_TESTSUITE_PATH) \( -name "*.o" -o -name "*.pexe" \) | $(XARGS) $(RM_F) @echo "Removing $(TESTSUITE_BIN) binary." @- $(RM_RF) $(TESTSUITE_BIN) endif diff --git a/frame/1/bli_l1v_check.c b/frame/1/bli_l1v_check.c index b3ac34397..737fbaceb 100644 --- a/frame/1/bli_l1v_check.c +++ b/frame/1/bli_l1v_check.c @@ -66,13 +66,46 @@ void PASTEMAC(opname,_check) \ obj_t* y \ ) \ { \ - bli_l1v_axy_check( alpha, x, y ); \ + bli_l1v_axy_check( alpha, x, y ); \ } GENFRONT( axpyv ) GENFRONT( scal2v ) +#undef GENFRONT +#define GENFRONT( opname ) \ +\ +void PASTEMAC(opname,_check) \ + ( \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + ) \ +{ \ + bli_l1v_xby_check( x, beta, y ); \ +} + +GENFRONT( xpbyv ) + + +#undef GENFRONT +#define GENFRONT( opname ) \ +\ +void PASTEMAC(opname,_check) \ + ( \ + obj_t* alpha, \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + ) \ +{ \ + bli_l1v_axby_check( alpha, x, beta, y ); \ +} + +GENFRONT( axpbyv ) + + #undef GENFRONT #define GENFRONT( opname ) \ \ @@ -182,43 +215,145 @@ void bli_l1v_axy_check obj_t* y ) { - err_t e_val; + err_t e_val; - // Check object datatypes. + // Check object datatypes. - e_val = bli_check_noninteger_object( alpha ); - bli_check_error_code( e_val ); + e_val = bli_check_noninteger_object( alpha ); + bli_check_error_code( e_val ); - e_val = bli_check_floating_object( x ); - bli_check_error_code( e_val ); + e_val = bli_check_floating_object( x ); + bli_check_error_code( e_val ); - e_val = bli_check_floating_object( y ); - bli_check_error_code( e_val ); + e_val = bli_check_floating_object( y ); + bli_check_error_code( e_val ); - // Check object dimensions. + // Check object dimensions. - e_val = bli_check_scalar_object( alpha ); - bli_check_error_code( e_val ); + e_val = bli_check_scalar_object( alpha ); + bli_check_error_code( e_val ); - e_val = bli_check_vector_object( x ); - bli_check_error_code( e_val ); + e_val = bli_check_vector_object( x ); + bli_check_error_code( e_val ); - e_val = bli_check_vector_object( y ); - bli_check_error_code( e_val ); + e_val = bli_check_vector_object( y ); + bli_check_error_code( e_val ); - e_val = bli_check_equal_vector_lengths( x, y ); - bli_check_error_code( e_val ); + e_val = bli_check_equal_vector_lengths( x, y ); + bli_check_error_code( e_val ); - // Check object buffers (for non-NULLness). + // Check object buffers (for non-NULLness). - e_val = bli_check_object_buffer( alpha ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( alpha ); + bli_check_error_code( e_val ); - e_val = bli_check_object_buffer( x ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( x ); + bli_check_error_code( e_val ); - e_val = bli_check_object_buffer( y ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( y ); + bli_check_error_code( e_val ); +} + +void bli_l1v_xby_check + ( + obj_t* x, + obj_t* beta, + obj_t* y + ) +{ + err_t e_val; + + // Check object datatypes. + + e_val = bli_check_noninteger_object( beta ); + bli_check_error_code( e_val ); + + e_val = bli_check_floating_object( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_floating_object( y ); + bli_check_error_code( e_val ); + + // Check object dimensions. + + e_val = bli_check_scalar_object( beta ); + bli_check_error_code( e_val ); + + e_val = bli_check_vector_object( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_vector_object( y ); + bli_check_error_code( e_val ); + + e_val = bli_check_equal_vector_lengths( x, y ); + bli_check_error_code( e_val ); + + // Check object buffers (for non-NULLness). + + e_val = bli_check_object_buffer( beta ); + bli_check_error_code( e_val ); + + e_val = bli_check_object_buffer( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_object_buffer( y ); + bli_check_error_code( e_val ); +} + +void bli_l1v_axby_check + ( + obj_t* alpha, + obj_t* x, + obj_t* beta, + obj_t* y + ) +{ + err_t e_val; + + // Check object datatypes. + + e_val = bli_check_noninteger_object( alpha ); + bli_check_error_code( e_val ); + + e_val = bli_check_noninteger_object( beta ); + bli_check_error_code( e_val ); + + e_val = bli_check_floating_object( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_floating_object( y ); + bli_check_error_code( e_val ); + + // Check object dimensions. + + e_val = bli_check_scalar_object( alpha ); + bli_check_error_code( e_val ); + + e_val = bli_check_scalar_object( beta ); + bli_check_error_code( e_val ); + + e_val = bli_check_vector_object( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_vector_object( y ); + bli_check_error_code( e_val ); + + e_val = bli_check_equal_vector_lengths( x, y ); + bli_check_error_code( e_val ); + + // Check object buffers (for non-NULLness). + + e_val = bli_check_object_buffer( alpha ); + bli_check_error_code( e_val ); + + e_val = bli_check_object_buffer( beta ); + bli_check_error_code( e_val ); + + e_val = bli_check_object_buffer( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_object_buffer( y ); + bli_check_error_code( e_val ); } void bli_l1v_dot_check diff --git a/frame/1/bli_l1v_check.h b/frame/1/bli_l1v_check.h index ab3cfeee9..1c87f5f51 100644 --- a/frame/1/bli_l1v_check.h +++ b/frame/1/bli_l1v_check.h @@ -66,6 +66,33 @@ GENTPROT( axpyv ) GENTPROT( scal2v ) +#undef GENTPROT +#define GENTPROT( opname ) \ +\ +void PASTEMAC(opname,_check) \ + ( \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + ); + +GENTPROT( xpbyv ) + + +#undef GENTPROT +#define GENTPROT( opname ) \ +\ +void PASTEMAC(opname,_check) \ + ( \ + obj_t* alpha, \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + ); + +GENTPROT( axpbyv ) + + #undef GENTPROT #define GENTPROT( opname ) \ \ @@ -133,6 +160,21 @@ void bli_l1v_axy_check obj_t* y ); +void bli_l1v_xby_check + ( + obj_t* x, + obj_t* beta, + obj_t* y + ); + +void bli_l1v_axby_check + ( + obj_t* alpha, + obj_t* x, + obj_t* beta, + obj_t* y + ); + void bli_l1v_dot_check ( obj_t* alpha, diff --git a/frame/1/bli_l1v_cntx.c b/frame/1/bli_l1v_cntx.c index 482441451..d1c504528 100644 --- a/frame/1/bli_l1v_cntx.c +++ b/frame/1/bli_l1v_cntx.c @@ -69,21 +69,69 @@ GENFRONT( swapv, BLIS_SWAPV_KER ) \ void PASTEMAC(opname,_cntx_init)( cntx_t* cntx ) \ { \ - bli_cntx_obj_create( cntx ); \ + bli_cntx_obj_create( cntx ); \ \ - /* Initialize the context with kernel dependencies. */ \ - PASTEMAC(depname,_cntx_init)( cntx ); \ + /* Initialize the context with kernel dependencies. */ \ + PASTEMAC(depname,_cntx_init)( cntx ); \ \ - /* Initialize the context with the kernel associated with the current - operation. */ \ - bli_gks_cntx_set_l1v_ker( kertype, cntx ); \ + /* Initialize the context with the kernel associated with the current + operation. */ \ + bli_gks_cntx_set_l1v_ker( kertype, cntx ); \ } \ void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ) \ { \ - bli_cntx_obj_free( cntx ); \ + bli_cntx_obj_free( cntx ); \ } GENFRONT( axpyv, BLIS_AXPYV_KER, addv ) -GENFRONT( scal2v, BLIS_SCAL2V_KER, setv ) GENFRONT( scalv, BLIS_SCALV_KER, setv ) + +#undef GENFRONT +#define GENFRONT( opname, kertype, dep1, dep2 ) \ +\ +void PASTEMAC(opname,_cntx_init)( cntx_t* cntx ) \ +{ \ + bli_cntx_obj_create( cntx ); \ +\ + /* Initialize the context with kernel dependencies. */ \ + PASTEMAC(dep1,_cntx_init)( cntx ); \ + PASTEMAC(dep2,_cntx_init)( cntx ); \ +\ + /* Initialize the context with the kernel associated with the current + operation. */ \ + bli_gks_cntx_set_l1v_ker( kertype, cntx ); \ +} \ +void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ) \ +{ \ + bli_cntx_obj_free( cntx ); \ +} + +GENFRONT( scal2v, BLIS_SCAL2V_KER, setv, copyv ) +GENFRONT( xpbyv, BLIS_XPBYV_KER, addv, copyv ) + + +#undef GENFRONT +#define GENFRONT( opname, kertype, dep1, dep2, dep3, dep4 ) \ +\ +void PASTEMAC(opname,_cntx_init)( cntx_t* cntx ) \ +{ \ + bli_cntx_obj_create( cntx ); \ +\ + /* Initialize the context with kernel dependencies. */ \ + PASTEMAC(dep1,_cntx_init)( cntx ); \ + PASTEMAC(dep2,_cntx_init)( cntx ); \ + PASTEMAC(dep3,_cntx_init)( cntx ); \ + PASTEMAC(dep4,_cntx_init)( cntx ); \ +\ + /* Initialize the context with the kernel associated with the current + operation. */ \ + bli_gks_cntx_set_l1v_ker( kertype, cntx ); \ +} \ +void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ) \ +{ \ + bli_cntx_obj_free( cntx ); \ +} + +GENFRONT( axpbyv, BLIS_AXPBYV_KER, axpyv, xpbyv, scal2v, scalv ) + diff --git a/frame/1/bli_l1v_cntx.h b/frame/1/bli_l1v_cntx.h index 6db0a29c1..a8c16d342 100644 --- a/frame/1/bli_l1v_cntx.h +++ b/frame/1/bli_l1v_cntx.h @@ -44,6 +44,7 @@ void PASTEMAC(opname,_cntx_init)( cntx_t* cntx ); \ void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ); GENPROT( addv ) +GENPROT( axpbyv ) GENPROT( axpyv ) GENPROT( copyv ) GENPROT( dotv ) @@ -54,4 +55,5 @@ GENPROT( scal2v ) GENPROT( setv ) GENPROT( subv ) GENPROT( swapv ) +GENPROT( xpbyv ) diff --git a/frame/1/bli_l1v_ft.h b/frame/1/bli_l1v_ft.h index e206938ce..051ca0f6c 100644 --- a/frame/1/bli_l1v_ft.h +++ b/frame/1/bli_l1v_ft.h @@ -76,6 +76,41 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \ INSERT_GENTDEF( axpyv ) INSERT_GENTDEF( scal2v ) +// xpybv + +#undef GENTDEF +#define GENTDEF( ctype, ch, opname, tsuf ) \ +\ +typedef void (*PASTECH2(ch,opname,tsuf)) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* x, inc_t incx, \ + ctype* beta, \ + ctype* y, inc_t incy, \ + cntx_t* cntx \ + ); + +INSERT_GENTDEF( xpbyv ) + +// axpbyv + +#undef GENTDEF +#define GENTDEF( ctype, ch, opname, tsuf ) \ +\ +typedef void (*PASTECH2(ch,opname,tsuf)) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* alpha, \ + ctype* x, inc_t incx, \ + ctype* beta, \ + ctype* y, inc_t incy, \ + cntx_t* cntx \ + ); + +INSERT_GENTDEF( axpbyv ) + // dotv #undef GENTDEF diff --git a/frame/1/bli_l1v_ker.h b/frame/1/bli_l1v_ker.h index 33cc7e6ae..13c675215 100644 --- a/frame/1/bli_l1v_ker.h +++ b/frame/1/bli_l1v_ker.h @@ -71,6 +71,39 @@ INSERT_GENTPROT_BASIC( axpyv_ker_name ) INSERT_GENTPROT_BASIC( scal2v_ker_name ) +#undef GENTPROT +#define GENTPROT( ctype, ch, opname ) \ +\ +void PASTEMAC(ch,opname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* x, inc_t incx, \ + ctype* beta, \ + ctype* y, inc_t incy, \ + cntx_t* cntx \ + ); \ + +INSERT_GENTPROT_BASIC( xpbyv_ker_name ) + + +#undef GENTPROT +#define GENTPROT( ctype, ch, opname ) \ +\ +void PASTEMAC(ch,opname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* alpha, \ + ctype* x, inc_t incx, \ + ctype* beta, \ + ctype* y, inc_t incy, \ + cntx_t* cntx \ + ); \ + +INSERT_GENTPROT_BASIC( axpbyv_ker_name ) + + #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ diff --git a/frame/1/bli_l1v_oapi.c b/frame/1/bli_l1v_oapi.c index 6482d5cdf..c43551236 100644 --- a/frame/1/bli_l1v_oapi.c +++ b/frame/1/bli_l1v_oapi.c @@ -136,6 +136,119 @@ GENFRONT( axpyv ) GENFRONT( scal2v ) + +#undef GENFRONT +#define GENFRONT( opname ) \ +\ +void PASTEMAC(opname,EX_SUF) \ + ( \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + BLIS_OAPI_CNTX_PARAM \ + ) \ +{ \ + BLIS_OAPI_CNTX_DECL \ +\ + num_t dt = bli_obj_datatype( *x ); \ +\ + conj_t conjx = bli_obj_conj_status( *x ); \ + dim_t n = bli_obj_vector_dim( *x ); \ + void* buf_x = bli_obj_buffer_at_off( *x ); \ + inc_t inc_x = bli_obj_vector_inc( *x ); \ + void* buf_y = bli_obj_buffer_at_off( *y ); \ + inc_t inc_y = bli_obj_vector_inc( *y ); \ +\ + void* buf_beta; \ +\ + obj_t beta_local; \ +\ + if ( bli_error_checking_is_enabled() ) \ + PASTEMAC(opname,_check)( x, beta, y ); \ +\ + /* Create local copy-casts of scalars (and apply internal conjugation + as needed). */ \ + bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ + beta, &beta_local ); \ + buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \ +\ + /* Invoke the void pointer-based function. */ \ + bli_call_ft_8 \ + ( \ + dt, \ + opname, \ + conjx, \ + n, \ + buf_x, inc_x, \ + buf_beta, \ + buf_y, inc_y, \ + cntx \ + ); \ +} + +GENFRONT( xpbyv ) + + + +#undef GENFRONT +#define GENFRONT( opname ) \ +\ +void PASTEMAC(opname,EX_SUF) \ + ( \ + obj_t* alpha, \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + BLIS_OAPI_CNTX_PARAM \ + ) \ +{ \ + BLIS_OAPI_CNTX_DECL \ +\ + num_t dt = bli_obj_datatype( *x ); \ +\ + conj_t conjx = bli_obj_conj_status( *x ); \ + dim_t n = bli_obj_vector_dim( *x ); \ + void* buf_x = bli_obj_buffer_at_off( *x ); \ + inc_t inc_x = bli_obj_vector_inc( *x ); \ + void* buf_y = bli_obj_buffer_at_off( *y ); \ + inc_t inc_y = bli_obj_vector_inc( *y ); \ +\ + void* buf_alpha; \ + void* buf_beta; \ +\ + obj_t alpha_local; \ + obj_t beta_local; \ +\ + if ( bli_error_checking_is_enabled() ) \ + PASTEMAC(opname,_check)( alpha, x, beta, y ); \ +\ + /* Create local copy-casts of scalars (and apply internal conjugation + as needed). */ \ + bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ + alpha, &alpha_local ); \ + bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ + beta, &beta_local ); \ + buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \ + buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \ +\ + /* Invoke the void pointer-based function. */ \ + bli_call_ft_9 \ + ( \ + dt, \ + opname, \ + conjx, \ + n, \ + buf_alpha, \ + buf_x, inc_x, \ + buf_beta, \ + buf_y, inc_y, \ + cntx \ + ); \ +} + +GENFRONT( axpbyv ) + + #undef GENFRONT #define GENFRONT( opname ) \ \ diff --git a/frame/1/bli_l1v_oapi.h b/frame/1/bli_l1v_oapi.h index 2f4da57d8..b6ec5094d 100644 --- a/frame/1/bli_l1v_oapi.h +++ b/frame/1/bli_l1v_oapi.h @@ -67,6 +67,35 @@ GENTPROT( axpyv ) GENTPROT( scal2v ) +#undef GENTPROT +#define GENTPROT( opname ) \ +\ +void PASTEMAC(opname,EX_SUF) \ + ( \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + BLIS_OAPI_CNTX_PARAM \ + ); + +GENTPROT( xpbyv ) + + +#undef GENTPROT +#define GENTPROT( opname ) \ +\ +void PASTEMAC(opname,EX_SUF) \ + ( \ + obj_t* alpha, \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + BLIS_OAPI_CNTX_PARAM \ + ); + +GENTPROT( axpbyv ) + + #undef GENTPROT #define GENTPROT( opname ) \ \ diff --git a/frame/1/bli_l1v_tapi.c b/frame/1/bli_l1v_tapi.c index af92aa92d..551a41d18 100644 --- a/frame/1/bli_l1v_tapi.c +++ b/frame/1/bli_l1v_tapi.c @@ -111,6 +111,80 @@ INSERT_GENTFUNC_BASIC( axpyv, BLIS_AXPYV_KER ) INSERT_GENTFUNC_BASIC( scal2v, BLIS_SCAL2V_KER ) +#undef GENTFUNC +#define GENTFUNC( ctype, ch, opname, kerid ) \ +\ +void PASTEMAC(ch,opname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* x, inc_t incx, \ + ctype* beta, \ + ctype* y, inc_t incy, \ + cntx_t* cntx \ + ) \ +{ \ + const num_t dt = PASTEMAC(ch,type); \ + cntx_t* cntx_p; \ +\ + bli_cntx_init_local_if( opname, cntx, cntx_p ); \ +\ + PASTECH2(ch,opname,_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx_p ); \ +\ + f \ + ( \ + conjx, \ + n, \ + x, incx, \ + beta, \ + y, incy, \ + cntx_p \ + ); \ +\ + bli_cntx_finalize_local_if( opname, cntx ); \ +} + +INSERT_GENTFUNC_BASIC( xpbyv, BLIS_XPBYV_KER ) + + +#undef GENTFUNC +#define GENTFUNC( ctype, ch, opname, kerid ) \ +\ +void PASTEMAC(ch,opname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* alpha, \ + ctype* x, inc_t incx, \ + ctype* beta, \ + ctype* y, inc_t incy, \ + cntx_t* cntx \ + ) \ +{ \ + const num_t dt = PASTEMAC(ch,type); \ + cntx_t* cntx_p; \ +\ + bli_cntx_init_local_if( opname, cntx, cntx_p ); \ +\ + PASTECH2(ch,opname,_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx_p ); \ +\ + f \ + ( \ + conjx, \ + n, \ + alpha, \ + x, incx, \ + beta, \ + y, incy, \ + cntx_p \ + ); \ +\ + bli_cntx_finalize_local_if( opname, cntx ); \ +} + +INSERT_GENTFUNC_BASIC( axpbyv, BLIS_AXPBYV_KER ) + + #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kerid ) \ \ diff --git a/frame/1/bli_l1v_tapi.h b/frame/1/bli_l1v_tapi.h index 618d9a280..b50fe8eb9 100644 --- a/frame/1/bli_l1v_tapi.h +++ b/frame/1/bli_l1v_tapi.h @@ -40,6 +40,9 @@ #undef addv_ker_name #define addv_ker_name addv +#undef axpbyv_ker_name +#define axpbyv_ker_name axpbyv + #undef axpyv_ker_name #define axpyv_ker_name axpyv @@ -70,6 +73,9 @@ #undef swapv_ker_name #define swapv_ker_name swapv +#undef xpbyv_ker_name +#define xpbyv_ker_name xpbyv + // Include the level-1v kernel API template. diff --git a/frame/1/kernels/bli_addv_ref.c b/frame/1/kernels/bli_addv_ref.c index 4a91667a2..c18748ae9 100644 --- a/frame/1/kernels/bli_addv_ref.c +++ b/frame/1/kernels/bli_addv_ref.c @@ -41,13 +41,13 @@ void PASTEMAC(ch,varname) \ ( \ conj_t conjx, \ dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ) \ { \ - ctype* chi1; \ - ctype* psi1; \ + ctype* restrict chi1; \ + ctype* restrict psi1; \ dim_t i; \ \ if ( bli_zero_dim1( n ) ) return; \ @@ -57,23 +57,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx ) ) \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,addjs)( *chi1, *psi1 ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if (incx == 1 && incy == 1) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,addjs)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,addjs)( *chi1, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,adds)( *chi1, *psi1 ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if (incx == 1 && incy == 1) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,adds)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,adds)( *chi1, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_axpbyv_ref.c b/frame/1/kernels/bli_axpbyv_ref.c new file mode 100644 index 000000000..91d92c733 --- /dev/null +++ b/frame/1/kernels/bli_axpbyv_ref.c @@ -0,0 +1,248 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +#undef GENTFUNC +#define GENTFUNC( ctype, ch, varname ) \ +\ +void PASTEMAC(ch,varname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ + ) \ +{ \ + ctype* restrict chi1; \ + ctype* restrict psi1; \ + dim_t i; \ +\ + if ( bli_zero_dim1( n ) ) return; \ +\ + if ( PASTEMAC(ch,eq0)( *alpha ) ) \ + { \ +\ + /* If alpha is zero and beta is zero, set to zero. */ \ + if ( PASTEMAC(ch,eq0)( *beta ) ) \ + { \ + ctype* zero = PASTEMAC(ch,0); \ +\ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,setv_ft) setv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SETV_KER, cntx ); \ +\ + setv_p \ + ( \ + BLIS_NO_CONJUGATE, \ + n, \ + zero, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + /* If alpha is zero and beta is one, return. */ \ + else if ( PASTEMAC(ch,eq1)( *beta ) ) \ + { \ + return; \ + } \ + /* If alpha is zero, scale by beta. */ \ + else \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,scalv_ft) scalv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SCALV_KER, cntx ); \ +\ + scalv_p \ + ( \ + BLIS_NO_CONJUGATE, \ + n, \ + beta, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ +\ + } \ + else if ( PASTEMAC(ch,eq1)( *alpha ) ) \ + { \ + \ + /* If alpha is one and beta is zero, copy. */ \ + if ( PASTEMAC(ch,eq0)( *beta ) ) \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \ + \ + copyv_p \ + ( \ + conjx, \ + n, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + /* If alpha is one and beta is one, add. */ \ + else if ( PASTEMAC(ch,eq1)( *beta ) ) \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,addv_ft) addv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_ADDV_KER, cntx ); \ + \ + addv_p \ + ( \ + conjx, \ + n, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + /* If alpha is one, call xpby. */ \ + else \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,xpbyv_ft) xpbyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_XPBYV_KER, cntx ); \ + \ + xpbyv_p \ + ( \ + conjx, \ + n, \ + x, incx, \ + beta, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + \ + } \ + else \ + { \ + \ + /* If beta is zero, call scal2. */ \ + if ( PASTEMAC(ch,eq0)( *beta ) ) \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,scal2v_ft) scal2v_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SCAL2V_KER, cntx ); \ + \ + scal2v_p \ + ( \ + conjx, \ + n, \ + alpha, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + /* If beta is one, call axpy. */ \ + else if ( PASTEMAC(ch,eq1)( *beta ) ) \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,axpyv_ft) axpyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \ + \ + axpyv_p \ + ( \ + conjx, \ + n, \ + alpha, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + \ + } \ +\ + chi1 = x; \ + psi1 = y; \ +\ + if ( bli_is_conj( conjx ) ) \ + { \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpbyjs)( *alpha, chi1[i], *beta, psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpbyjs)( *alpha, *chi1, *beta, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ + } \ + else \ + { \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpbys)( *alpha, chi1[i], *beta, psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpbys)( *alpha, *chi1, *beta, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ + } \ +} + +INSERT_GENTFUNC_BASIC0( axpbyv_ref ) + diff --git a/frame/1/kernels/bli_axpyv_ref.c b/frame/1/kernels/bli_axpyv_ref.c index 4b29505cf..a6dd49976 100644 --- a/frame/1/kernels/bli_axpyv_ref.c +++ b/frame/1/kernels/bli_axpyv_ref.c @@ -41,14 +41,14 @@ void PASTEMAC(ch,varname) \ ( \ conj_t conjx, \ dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ) \ { \ - ctype* chi1; \ - ctype* psi1; \ + ctype* restrict chi1; \ + ctype* restrict psi1; \ dim_t i; \ \ if ( bli_zero_dim1( n ) ) return; \ @@ -79,23 +79,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx ) ) \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,axpyjs)( *alpha, *chi1, *psi1 ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpyjs)( *alpha, chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpyjs)( *alpha, *chi1, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,axpys)( *alpha, *chi1, *psi1 ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpys)( *alpha, chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpys)( *alpha, *chi1, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_copyv_ref.c b/frame/1/kernels/bli_copyv_ref.c index b852f76e7..28d22c7b2 100644 --- a/frame/1/kernels/bli_copyv_ref.c +++ b/frame/1/kernels/bli_copyv_ref.c @@ -41,13 +41,13 @@ void PASTEMAC(ch,varname) \ ( \ conj_t conjx, \ dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ) \ { \ - ctype* chi1; \ - ctype* psi1; \ + ctype* restrict chi1; \ + ctype* restrict psi1; \ dim_t i; \ \ if ( bli_zero_dim1( n ) ) return; \ @@ -57,23 +57,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx ) ) \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,copyjs)( *chi1, *psi1 ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,copyjs)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,copyjs)( *chi1, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,copys)( *chi1, *psi1 ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,copys)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,copys)( *chi1, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_dotv_ref.c b/frame/1/kernels/bli_dotv_ref.c index b17480b07..6f790c81c 100644 --- a/frame/1/kernels/bli_dotv_ref.c +++ b/frame/1/kernels/bli_dotv_ref.c @@ -42,14 +42,14 @@ void PASTEMAC(ch,varname) \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* rho, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict rho, \ cntx_t* cntx \ ) \ { \ - ctype* chi1; \ - ctype* psi1; \ + ctype* restrict chi1; \ + ctype* restrict psi1; \ ctype dotxy; \ dim_t i; \ conj_t conjx_use; \ @@ -75,23 +75,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx_use ) ) \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,dotjs)( *chi1, *psi1, dotxy ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dotjs)( chi1[i], psi1[i], dotxy ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dotjs)( *chi1, *psi1, dotxy ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,dots)( *chi1, *psi1, dotxy ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dots)( chi1[i], psi1[i], dotxy ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dots)( *chi1, *psi1, dotxy ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ \ if ( bli_is_conj( conjy ) ) \ diff --git a/frame/1/kernels/bli_dotxv_ref.c b/frame/1/kernels/bli_dotxv_ref.c index b611533d4..89dedcc48 100644 --- a/frame/1/kernels/bli_dotxv_ref.c +++ b/frame/1/kernels/bli_dotxv_ref.c @@ -42,16 +42,16 @@ void PASTEMAC(ch,varname) \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* beta, \ - ctype* rho, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict beta, \ + ctype* restrict rho, \ cntx_t* cntx \ ) \ { \ - ctype* chi1; \ - ctype* psi1; \ + ctype* restrict chi1; \ + ctype* restrict psi1; \ ctype dotxy; \ dim_t i; \ conj_t conjx_use; \ @@ -83,23 +83,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx_use ) ) \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,dotjs)( *chi1, *psi1, dotxy ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dotjs)( chi1[i], psi1[i], dotxy ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dotjs)( *chi1, *psi1, dotxy ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,dots)( *chi1, *psi1, dotxy ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dots)( chi1[i], psi1[i], dotxy ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dots)( *chi1, *psi1, dotxy ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ \ if ( bli_is_conj( conjy ) ) \ diff --git a/frame/1/kernels/bli_invertv_ref.c b/frame/1/kernels/bli_invertv_ref.c index c7f3dbcb7..a79d8c9f0 100644 --- a/frame/1/kernels/bli_invertv_ref.c +++ b/frame/1/kernels/bli_invertv_ref.c @@ -40,23 +40,33 @@ void PASTEMAC(ch,varname) \ ( \ dim_t n, \ - ctype* x, inc_t incx, \ + ctype* restrict x, inc_t incx, \ cntx_t* cntx \ ) \ { \ - ctype* chi1; \ + ctype* restrict chi1; \ dim_t i; \ \ if ( bli_zero_dim1( n ) ) return; \ \ chi1 = x; \ \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,inverts)( *chi1 ); \ -\ - chi1 += incx; \ - } \ + if ( incx == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,inverts)( chi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,inverts)( *chi1 ); \ + \ + chi1 += incx; \ + } \ + } \ } INSERT_GENTFUNC_BASIC0( invertv_ref ) diff --git a/frame/1/kernels/bli_l1v_ref.h b/frame/1/kernels/bli_l1v_ref.h index f3857d841..60cdbd2ee 100644 --- a/frame/1/kernels/bli_l1v_ref.h +++ b/frame/1/kernels/bli_l1v_ref.h @@ -40,8 +40,8 @@ void PASTEMAC(ch,varname) \ ( \ conj_t conjx, \ dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ); @@ -57,9 +57,9 @@ void PASTEMAC(ch,varname) \ ( \ conj_t conjx, \ dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ); @@ -67,6 +67,39 @@ INSERT_GENTPROT_BASIC( axpyv_ref ) INSERT_GENTPROT_BASIC( scal2v_ref ) +#undef GENTPROT +#define GENTPROT( ctype, ch, varname ) \ +\ +void PASTEMAC(ch,varname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ + ); + +INSERT_GENTPROT_BASIC( xpbyv_ref ) + + +#undef GENTPROT +#define GENTPROT( ctype, ch, varname ) \ +\ +void PASTEMAC(ch,varname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ + ); + +INSERT_GENTPROT_BASIC( axpbyv_ref ) + + #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ @@ -75,9 +108,9 @@ void PASTEMAC(ch,varname) \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* rho, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict rho, \ cntx_t* cntx \ ); @@ -92,11 +125,11 @@ void PASTEMAC(ch,varname) \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* beta, \ - ctype* rho, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict beta, \ + ctype* restrict rho, \ cntx_t* cntx \ ); @@ -109,7 +142,7 @@ INSERT_GENTPROT_BASIC( dotxv_ref ) void PASTEMAC(ch,varname) \ ( \ dim_t n, \ - ctype* x, inc_t incx, \ + ctype* restrict x, inc_t incx, \ cntx_t* cntx \ ); @@ -123,8 +156,8 @@ void PASTEMAC(ch,varname) \ ( \ conj_t conjalpha, \ dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ cntx_t* cntx \ ); @@ -138,8 +171,8 @@ INSERT_GENTPROT_BASIC( setv_ref ) void PASTEMAC(ch,varname) \ ( \ dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ); diff --git a/frame/1/kernels/bli_scal2v_ref.c b/frame/1/kernels/bli_scal2v_ref.c index 3f739cd90..f4bc0d541 100644 --- a/frame/1/kernels/bli_scal2v_ref.c +++ b/frame/1/kernels/bli_scal2v_ref.c @@ -41,14 +41,14 @@ void PASTEMAC(ch,varname) \ ( \ conj_t conjx, \ dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ) \ { \ - ctype* chi1; \ - ctype* psi1; \ + ctype* restrict chi1; \ + ctype* restrict psi1; \ dim_t i; \ \ if ( bli_zero_dim1( n ) ) return; \ @@ -72,29 +72,66 @@ void PASTEMAC(ch,varname) \ ); \ return; \ } \ + /* If alpha is one, use copyv. */ \ + else if ( PASTEMAC(ch,eq0)( *alpha ) ) \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \ +\ + copyv_p \ + ( \ + BLIS_NO_CONJUGATE, \ + n, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ \ chi1 = x; \ psi1 = y; \ \ if ( bli_is_conj( conjx ) ) \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,scal2js)( *alpha, *chi1, *psi1 ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,scal2js)( *alpha, chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,scal2js)( *alpha, *chi1, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,scal2s)( *alpha, *chi1, *psi1 ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,scal2s)( *alpha, chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,scal2s)( *alpha, *chi1, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_scalv_ref.c b/frame/1/kernels/bli_scalv_ref.c index 982313c9b..7d238c6f4 100644 --- a/frame/1/kernels/bli_scalv_ref.c +++ b/frame/1/kernels/bli_scalv_ref.c @@ -41,12 +41,12 @@ void PASTEMAC(ch,varname) \ ( \ conj_t conjalpha, \ dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ cntx_t* cntx \ ) \ { \ - ctype* chi1; \ + ctype* restrict chi1; \ ctype alpha_conj; \ dim_t i; \ \ @@ -79,12 +79,22 @@ void PASTEMAC(ch,varname) \ \ chi1 = x; \ \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,scals)( alpha_conj, *chi1 ); \ -\ - chi1 += incx; \ - } \ + if ( incx == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,scals)( alpha_conj, chi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,scals)( alpha_conj, *chi1 ); \ + \ + chi1 += incx; \ + } \ + } \ } INSERT_GENTFUNC_BASIC0( scalv_ref ) diff --git a/frame/1/kernels/bli_setv_ref.c b/frame/1/kernels/bli_setv_ref.c index f01364339..4f7d1db2d 100644 --- a/frame/1/kernels/bli_setv_ref.c +++ b/frame/1/kernels/bli_setv_ref.c @@ -41,12 +41,12 @@ void PASTEMAC(ch,varname) \ ( \ conj_t conjalpha, \ dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ cntx_t* cntx \ ) \ { \ - ctype* chi1; \ + ctype* restrict chi1; \ ctype alpha_conj; \ dim_t i; \ \ @@ -56,23 +56,43 @@ void PASTEMAC(ch,varname) \ \ if ( PASTEMAC(ch,eq0)( *alpha ) ) \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,set0s)( *chi1 ); \ -\ - chi1 += incx; \ - } \ + if ( incx == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,set0s)( chi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,set0s)( *chi1 ); \ + \ + chi1 += incx; \ + } \ + } \ } \ else \ { \ PASTEMAC(ch,copycjs)( conjalpha, *alpha, alpha_conj ); \ \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,copys)( alpha_conj, *chi1 ); \ -\ - chi1 += incx; \ - } \ + if ( incx == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,copys)( alpha_conj, chi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,copys)( alpha_conj, *chi1 ); \ + \ + chi1 += incx; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_subv_ref.c b/frame/1/kernels/bli_subv_ref.c index eca8f36dc..bc59b01b6 100644 --- a/frame/1/kernels/bli_subv_ref.c +++ b/frame/1/kernels/bli_subv_ref.c @@ -41,13 +41,13 @@ void PASTEMAC(ch,varname) \ ( \ conj_t conjx, \ dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ) \ { \ - ctype* chi1; \ - ctype* psi1; \ + ctype* restrict chi1; \ + ctype* restrict psi1; \ dim_t i; \ \ if ( bli_zero_dim1( n ) ) return; \ @@ -57,23 +57,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx ) ) \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,subjs)( *chi1, *psi1 ); \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,subjs)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,subjs)( *chi1, *psi1 ); \ \ - chi1 += incx; \ - psi1 += incy; \ - } \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,subs)( *chi1, *psi1 ); \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,subs)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,subs)( *chi1, *psi1 ); \ \ - chi1 += incx; \ - psi1 += incy; \ - } \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_swapv_ref.c b/frame/1/kernels/bli_swapv_ref.c index 8fe4a4b9a..6a43fb0d0 100644 --- a/frame/1/kernels/bli_swapv_ref.c +++ b/frame/1/kernels/bli_swapv_ref.c @@ -40,13 +40,13 @@ void PASTEMAC(ch,varname) \ ( \ dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ) \ { \ - ctype* chi1; \ - ctype* psi1; \ + ctype* restrict chi1; \ + ctype* restrict psi1; \ dim_t i; \ \ if ( bli_zero_dim1( n ) ) return; \ @@ -54,13 +54,23 @@ void PASTEMAC(ch,varname) \ chi1 = x; \ psi1 = y; \ \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,swaps)( *chi1, *psi1 ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,swaps)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,swaps)( *chi1, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } INSERT_GENTFUNC_BASIC0( swapv_ref ) diff --git a/frame/1/kernels/bli_xpbyv_ref.c b/frame/1/kernels/bli_xpbyv_ref.c new file mode 100644 index 000000000..508e06ce8 --- /dev/null +++ b/frame/1/kernels/bli_xpbyv_ref.c @@ -0,0 +1,137 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +#undef GENTFUNC +#define GENTFUNC( ctype, ch, varname ) \ +\ +void PASTEMAC(ch,varname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ + ) \ +{ \ + ctype* restrict chi1; \ + ctype* restrict psi1; \ + dim_t i; \ +\ + if ( bli_zero_dim1( n ) ) return; \ +\ + /* If beta is zero, use copyv. */ \ + if ( PASTEMAC(ch,eq0)( *beta ) ) \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \ +\ + copyv_p \ + ( \ + conjx, \ + n, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + /* If alpha is one, use addv. */ \ + else if ( PASTEMAC(ch,eq1)( *beta ) ) \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,addv_ft) addv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_ADDV_KER, cntx ); \ +\ + addv_p \ + ( \ + conjx, \ + n, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ +\ + chi1 = x; \ + psi1 = y; \ +\ + if ( bli_is_conj( conjx ) ) \ + { \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,xpbyjs)( chi1[i], *beta, psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,xpbyjs)( *chi1, *beta, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ + } \ + else \ + { \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,xpbys)( chi1[i], *beta, psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,xpbys)( *chi1, *beta, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ + } \ +} + +INSERT_GENTFUNC_BASIC0( xpbyv_ref ) + diff --git a/frame/base/bli_gks.c b/frame/base/bli_gks.c index 1368d8846..74e9dde11 100644 --- a/frame/base/bli_gks.c +++ b/frame/base/bli_gks.c @@ -754,6 +754,9 @@ static func_t bli_gks_l1v_kers[BLIS_NUM_LEVEL1V_KERS] = /* addv */ { { BLIS_SADDV_KERNEL, BLIS_CADDV_KERNEL, BLIS_DADDV_KERNEL, BLIS_ZADDV_KERNEL, } }, +/* axpbyv */ { { BLIS_SAXPBYV_KERNEL, BLIS_CAXPBYV_KERNEL, + BLIS_DAXPBYV_KERNEL, BLIS_ZAXPBYV_KERNEL, } + }, /* axpyv */ { { BLIS_SAXPYV_KERNEL, BLIS_CAXPYV_KERNEL, BLIS_DAXPYV_KERNEL, BLIS_ZAXPYV_KERNEL, } }, @@ -784,6 +787,9 @@ static func_t bli_gks_l1v_kers[BLIS_NUM_LEVEL1V_KERS] = /* swapv */ { { BLIS_SSWAPV_KERNEL, BLIS_CSWAPV_KERNEL, BLIS_DSWAPV_KERNEL, BLIS_ZSWAPV_KERNEL, } }, +/* xpbyv */ { { BLIS_SXPBYV_KERNEL, BLIS_CXPBYV_KERNEL, + BLIS_DXPBYV_KERNEL, BLIS_ZXPBYV_KERNEL, } + }, }; static func_t bli_gks_l1v_ref_kers[BLIS_NUM_LEVEL1V_KERS] = @@ -792,6 +798,9 @@ static func_t bli_gks_l1v_ref_kers[BLIS_NUM_LEVEL1V_KERS] = /* addv */ { { BLIS_SADDV_KERNEL_REF, BLIS_CADDV_KERNEL_REF, BLIS_DADDV_KERNEL_REF, BLIS_ZADDV_KERNEL_REF, } }, +/* axpbyv */ { { BLIS_SAXPBYV_KERNEL_REF, BLIS_CAXPBYV_KERNEL_REF, + BLIS_DAXPBYV_KERNEL_REF, BLIS_ZAXPBYV_KERNEL_REF, } + }, /* axpyv */ { { BLIS_SAXPYV_KERNEL_REF, BLIS_CAXPYV_KERNEL_REF, BLIS_DAXPYV_KERNEL_REF, BLIS_ZAXPYV_KERNEL_REF, } }, @@ -822,6 +831,9 @@ static func_t bli_gks_l1v_ref_kers[BLIS_NUM_LEVEL1V_KERS] = /* swapv */ { { BLIS_SSWAPV_KERNEL_REF, BLIS_CSWAPV_KERNEL_REF, BLIS_DSWAPV_KERNEL_REF, BLIS_ZSWAPV_KERNEL_REF, } }, +/* xpbyv */ { { BLIS_SXPBYV_KERNEL_REF, BLIS_CXPBYV_KERNEL_REF, + BLIS_DXPBYV_KERNEL_REF, BLIS_ZXPBYV_KERNEL_REF, } + }, }; // ----------------------------------------------------------------------------- diff --git a/frame/include/bli_kernel_macro_defs.h b/frame/include/bli_kernel_macro_defs.h index 9f3643a90..1c599a20e 100644 --- a/frame/include/bli_kernel_macro_defs.h +++ b/frame/include/bli_kernel_macro_defs.h @@ -629,6 +629,24 @@ #define BLIS_ZADDV_KERNEL BLIS_ZADDV_KERNEL_REF #endif +// axpbyv kernels + +#ifndef BLIS_SAXPBYV_KERNEL +#define BLIS_SAXPBYV_KERNEL BLIS_SAXPBYV_KERNEL_REF +#endif + +#ifndef BLIS_DAXPBYV_KERNEL +#define BLIS_DAXPBYV_KERNEL BLIS_DAXPBYV_KERNEL_REF +#endif + +#ifndef BLIS_CAXPBYV_KERNEL +#define BLIS_CAXPBYV_KERNEL BLIS_CAXPBYV_KERNEL_REF +#endif + +#ifndef BLIS_ZAXPBYV_KERNEL +#define BLIS_ZAXPBYV_KERNEL BLIS_ZAXPBYV_KERNEL_REF +#endif + // axpyv kernels #ifndef BLIS_SAXPYV_KERNEL @@ -809,6 +827,24 @@ #define BLIS_ZSWAPV_KERNEL BLIS_ZSWAPV_KERNEL_REF #endif +// xpbyv kernels + +#ifndef BLIS_SXPBYV_KERNEL +#define BLIS_SXPBYV_KERNEL BLIS_SXPBYV_KERNEL_REF +#endif + +#ifndef BLIS_DXPBYV_KERNEL +#define BLIS_DXPBYV_KERNEL BLIS_DXPBYV_KERNEL_REF +#endif + +#ifndef BLIS_CXPBYV_KERNEL +#define BLIS_CXPBYV_KERNEL BLIS_CXPBYV_KERNEL_REF +#endif + +#ifndef BLIS_ZXPBYV_KERNEL +#define BLIS_ZXPBYV_KERNEL BLIS_ZXPBYV_KERNEL_REF +#endif + // -- Define default blocksize macros ------------------------------------------ diff --git a/frame/include/bli_kernel_pre_macro_defs.h b/frame/include/bli_kernel_pre_macro_defs.h index 703f8c54f..10f27ac54 100644 --- a/frame/include/bli_kernel_pre_macro_defs.h +++ b/frame/include/bli_kernel_pre_macro_defs.h @@ -260,6 +260,13 @@ #define BLIS_CADDV_KERNEL_REF bli_caddv_ref #define BLIS_ZADDV_KERNEL_REF bli_zaddv_ref +// axpbyv kernels + +#define BLIS_SAXPBYV_KERNEL_REF bli_saxpbyv_ref +#define BLIS_DAXPBYV_KERNEL_REF bli_daxpbyv_ref +#define BLIS_CAXPBYV_KERNEL_REF bli_caxpbyv_ref +#define BLIS_ZAXPBYV_KERNEL_REF bli_zaxpbyv_ref + // axpyv kernels #define BLIS_SAXPYV_KERNEL_REF bli_saxpyv_ref @@ -330,6 +337,13 @@ #define BLIS_CSWAPV_KERNEL_REF bli_cswapv_ref #define BLIS_ZSWAPV_KERNEL_REF bli_zswapv_ref +// xpbyv kernels + +#define BLIS_SXPBYV_KERNEL_REF bli_sxpbyv_ref +#define BLIS_DXPBYV_KERNEL_REF bli_dxpbyv_ref +#define BLIS_CXPBYV_KERNEL_REF bli_cxpbyv_ref +#define BLIS_ZXPBYV_KERNEL_REF bli_zxpbyv_ref + #endif diff --git a/frame/include/bli_scalar_macro_defs.h b/frame/include/bli_scalar_macro_defs.h index 32258e86e..8104e5d4f 100644 --- a/frame/include/bli_scalar_macro_defs.h +++ b/frame/include/bli_scalar_macro_defs.h @@ -75,6 +75,9 @@ #include "bli_add3ris.h" +#include "bli_axpbyris.h" +#include "bli_axpbyjris.h" + #include "bli_axpyris.h" #include "bli_axpyjris.h" @@ -112,6 +115,7 @@ #include "bli_swapris.h" #include "bli_xpbyris.h" +#include "bli_xpbyjris.h" // Inlined scalar macros in loops #include "bli_scalris_mxn_uplo.h" @@ -128,6 +132,9 @@ #include "bli_add3s.h" +#include "bli_axpbys.h" +#include "bli_axpbyjs.h" + #include "bli_axpys.h" #include "bli_axpyjs.h" @@ -178,6 +185,7 @@ #include "bli_swaps.h" #include "bli_xpbys.h" +#include "bli_xpbyjs.h" // Inlined scalar macros in loops #include "bli_adds_mxn.h" diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index 2efaedf9e..5f921b79d 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -785,6 +785,7 @@ typedef enum typedef enum { BLIS_ADDV_KER = 0, + BLIS_AXPBYV_KER, BLIS_AXPYV_KER, BLIS_COPYV_KER, BLIS_DOTV_KER, @@ -795,9 +796,10 @@ typedef enum BLIS_SETV_KER, BLIS_SUBV_KER, BLIS_SWAPV_KER, + BLIS_XPBYV_KER, } l1vkr_t; -#define BLIS_NUM_LEVEL1V_KERS 11 +#define BLIS_NUM_LEVEL1V_KERS 13 typedef enum { diff --git a/frame/include/level0/bli_axpbyjs.h b/frame/include/level0/bli_axpbyjs.h new file mode 100644 index 000000000..3d0a663b2 --- /dev/null +++ b/frame/include/level0/bli_axpbyjs.h @@ -0,0 +1,481 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_AXPBYJS_H +#define BLIS_AXPBYJS_H + +// axpbyjs + +// Notes: +// - The first char encodes the type of a. +// - The second char encodes the type of x. +// - The third char encodes the type of b. +// - The fourth char encodes the type of y. + + +// -- (axby) = (???s) ---------------------------------------------------------- + +#define bli_ssssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dsssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_csssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zsssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_sdssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_ddssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cdssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zdssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_scssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dcssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_ccssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zcssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_szssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dzssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_czssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zzssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) + +#define bli_ssdsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dsdsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_csdsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zsdsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_sddsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dddsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cddsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zddsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_scdsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dcdsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_ccdsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zcdsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_szdsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dzdsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_czdsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zzdsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) + +#define bli_sscsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dscsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cscsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zscsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_sdcsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_ddcsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cdcsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zdcsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_sccsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dccsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cccsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zccsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_szcsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dzcsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_czcsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zzcsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) + +#define bli_sszsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dszsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cszsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zszsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_sdzsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_ddzsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cdzsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zdzsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_sczsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dczsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cczsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zczsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_szzsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dzzsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_czzsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zzzsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) + +// -- (axby) = (???d) ---------------------------------------------------------- + +#define bli_sssdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dssdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cssdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zssdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_sdsdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ddsdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cdsdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zdsdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_scsdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dcsdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ccsdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zcsdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_szsdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dzsdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_czsdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zzsdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) + +#define bli_ssddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dsddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_csddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zsddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_sdddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ddddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cdddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zdddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_scddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dcddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ccddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zcddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_szddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dzddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_czddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zzddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) + +#define bli_sscdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dscdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cscdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zscdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_sdcdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ddcdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cdcdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zdcdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_sccdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dccdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cccdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zccdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_szcdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dzcdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_czcdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zzcdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) + +#define bli_sszdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dszdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cszdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zszdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_sdzdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ddzdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cdzdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zdzdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_sczdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dczdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cczdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zczdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_szzdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dzzdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_czzdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zzzdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) + +#ifndef BLIS_ENABLE_C99_COMPLEX + +// -- (axby) = (???c) ---------------------------------------------------------- + +#define bli_ssscaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dsscaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_csscaxpbyjs( a, x, b, y ) bli_cssaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zsscaxpbyjs( a, x, b, y ) bli_cssaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_sdscaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ddscaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cdscaxpbyjs( a, x, b, y ) bli_cssaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zdscaxpbyjs( a, x, b, y ) bli_cssaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_scscaxpbyjs( a, x, b, y ) bli_scsaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dcscaxpbyjs( a, x, b, y ) bli_scsaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ccscaxpbyjs( a, x, b, y ) bli_ccsaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zcscaxpbyjs( a, x, b, y ) bli_ccsaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_szscaxpbyjs( a, x, b, y ) bli_scsaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dzscaxpbyjs( a, x, b, y ) bli_scsaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_czscaxpbyjs( a, x, b, y ) bli_ccsaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zzscaxpbyjs( a, x, b, y ) bli_ccsaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) + +#define bli_ssdcaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dsdcaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_csdcaxpbyjs( a, x, b, y ) bli_cssaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zsdcaxpbyjs( a, x, b, y ) bli_cssaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_sddcaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dddcaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cddcaxpbyjs( a, x, b, y ) bli_cssaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zddcaxpbyjs( a, x, b, y ) bli_cssaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_scdcaxpbyjs( a, x, b, y ) bli_scsaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dcdcaxpbyjs( a, x, b, y ) bli_scsaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ccdcaxpbyjs( a, x, b, y ) bli_ccsaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zcdcaxpbyjs( a, x, b, y ) bli_ccsaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_szdcaxpbyjs( a, x, b, y ) bli_scsaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dzdcaxpbyjs( a, x, b, y ) bli_scsaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_czdcaxpbyjs( a, x, b, y ) bli_ccsaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zzdcaxpbyjs( a, x, b, y ) bli_ccsaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) + +#define bli_ssccaxpbyjs( a, x, b, y ) bli_sscaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dsccaxpbyjs( a, x, b, y ) bli_sscaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_csccaxpbyjs( a, x, b, y ) bli_cscaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zsccaxpbyjs( a, x, b, y ) bli_cscaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_sdccaxpbyjs( a, x, b, y ) bli_sscaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ddccaxpbyjs( a, x, b, y ) bli_sscaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cdccaxpbyjs( a, x, b, y ) bli_cscaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zdccaxpbyjs( a, x, b, y ) bli_cscaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_scccaxpbyjs( a, x, b, y ) bli_sccaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dcccaxpbyjs( a, x, b, y ) bli_sccaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ccccaxpbyjs( a, x, b, y ) bli_caxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zcccaxpbyjs( a, x, b, y ) bli_caxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_szccaxpbyjs( a, x, b, y ) bli_sccaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dzccaxpbyjs( a, x, b, y ) bli_sccaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_czccaxpbyjs( a, x, b, y ) bli_caxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zzccaxpbyjs( a, x, b, y ) bli_caxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) + +#define bli_sszcaxpbyjs( a, x, b, y ) bli_sscaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dszcaxpbyjs( a, x, b, y ) bli_sscaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cszcaxpbyjs( a, x, b, y ) bli_cscaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zszcaxpbyjs( a, x, b, y ) bli_cscaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_sdzcaxpbyjs( a, x, b, y ) bli_sscaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ddzcaxpbyjs( a, x, b, y ) bli_sscaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cdzcaxpbyjs( a, x, b, y ) bli_cscaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zdzcaxpbyjs( a, x, b, y ) bli_cscaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_sczcaxpbyjs( a, x, b, y ) bli_sccaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dczcaxpbyjs( a, x, b, y ) bli_sccaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cczcaxpbyjs( a, x, b, y ) bli_caxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zczcaxpbyjs( a, x, b, y ) bli_caxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_szzcaxpbyjs( a, x, b, y ) bli_sccaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dzzcaxpbyjs( a, x, b, y ) bli_sccaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_czzcaxpbyjs( a, x, b, y ) bli_caxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zzzcaxpbyjs( a, x, b, y ) bli_caxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) + +// -- (axby) = (???z) ---------------------------------------------------------- + +#define bli_ssszaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dsszaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_csszaxpbyjs( a, x, b, y ) bli_zddaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zsszaxpbyjs( a, x, b, y ) bli_zddaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_sdszaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ddszaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cdszaxpbyjs( a, x, b, y ) bli_zddaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zdszaxpbyjs( a, x, b, y ) bli_zddaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_scszaxpbyjs( a, x, b, y ) bli_dzdaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dcszaxpbyjs( a, x, b, y ) bli_dzdaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ccszaxpbyjs( a, x, b, y ) bli_zzdaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zcszaxpbyjs( a, x, b, y ) bli_zzdaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_szszaxpbyjs( a, x, b, y ) bli_dzdaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dzszaxpbyjs( a, x, b, y ) bli_dzdaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_czszaxpbyjs( a, x, b, y ) bli_zzdaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zzszaxpbyjs( a, x, b, y ) bli_zzdaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) + +#define bli_ssdzaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dsdzaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_csdzaxpbyjs( a, x, b, y ) bli_zddaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zsdzaxpbyjs( a, x, b, y ) bli_zddaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_sddzaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dddzaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cddzaxpbyjs( a, x, b, y ) bli_zddaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zddzaxpbyjs( a, x, b, y ) bli_zddaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_scdzaxpbyjs( a, x, b, y ) bli_dzdaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dcdzaxpbyjs( a, x, b, y ) bli_dzdaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ccdzaxpbyjs( a, x, b, y ) bli_zzdaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zcdzaxpbyjs( a, x, b, y ) bli_zzdaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_szdzaxpbyjs( a, x, b, y ) bli_dzdaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dzdzaxpbyjs( a, x, b, y ) bli_dzdaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_czdzaxpbyjs( a, x, b, y ) bli_zzdaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zzdzaxpbyjs( a, x, b, y ) bli_zzdaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) + +#define bli_ssczaxpbyjs( a, x, b, y ) bli_ddzaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dsczaxpbyjs( a, x, b, y ) bli_ddzaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_csczaxpbyjs( a, x, b, y ) bli_zdzaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zsczaxpbyjs( a, x, b, y ) bli_zdzaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_sdczaxpbyjs( a, x, b, y ) bli_ddzaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ddczaxpbyjs( a, x, b, y ) bli_ddzaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cdczaxpbyjs( a, x, b, y ) bli_zdzaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zdczaxpbyjs( a, x, b, y ) bli_zdzaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_scczaxpbyjs( a, x, b, y ) bli_dzzaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dcczaxpbyjs( a, x, b, y ) bli_dzzaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ccczaxpbyjs( a, x, b, y ) bli_zaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zcczaxpbyjs( a, x, b, y ) bli_zaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_szczaxpbyjs( a, x, b, y ) bli_dzzaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dzczaxpbyjs( a, x, b, y ) bli_dzzaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_czczaxpbyjs( a, x, b, y ) bli_zaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zzczaxpbyjs( a, x, b, y ) bli_zaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) + +#define bli_sszzaxpbyjs( a, x, b, y ) bli_ddzaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dszzaxpbyjs( a, x, b, y ) bli_ddzaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cszzaxpbyjs( a, x, b, y ) bli_zdzaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zszzaxpbyjs( a, x, b, y ) bli_zdzaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_sdzzaxpbyjs( a, x, b, y ) bli_ddzaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ddzzaxpbyjs( a, x, b, y ) bli_ddzaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cdzzaxpbyjs( a, x, b, y ) bli_zdzaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zdzzaxpbyjs( a, x, b, y ) bli_zdzaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_sczzaxpbyjs( a, x, b, y ) bli_dzzaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dczzaxpbyjs( a, x, b, y ) bli_dzzaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cczzaxpbyjs( a, x, b, y ) bli_zaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zczzaxpbyjs( a, x, b, y ) bli_zaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_szzzaxpbyjs( a, x, b, y ) bli_dzzaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dzzzaxpbyjs( a, x, b, y ) bli_dzzaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_czzzaxpbyjs( a, x, b, y ) bli_zaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zzzzaxpbyjs( a, x, b, y ) bli_zaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) + +#else // ifdef BLIS_ENABLE_C99_COMPLEX + +// -- (axby) = (???c) ---------------------------------------------------------- + +#define bli_ssscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dsscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_csscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zsscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sdscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ddscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cdscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zdscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_scscaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_dcscaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_ccscaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_zcscaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_szscaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_dzscaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_czscaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_zzscaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } + +#define bli_ssdcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dsdcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_csdcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zsdcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sddcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dddcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cddcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zddcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_scdcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_dcdcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_ccdcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_zcdcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_szdcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_dzdcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_czdcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_zzdcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } + +#define bli_ssccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dsccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_csccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zsccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sdccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ddccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cdccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zdccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_scccaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_dcccaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_ccccaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_zcccaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_szccaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_dzccaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_czccaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_zzccaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } + +#define bli_sszcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dszcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cszcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zszcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sdzcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ddzcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cdzcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zdzcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sczcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_dczcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_cczcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_zczcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_szzcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_dzzcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_czzcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_zzzcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } + +// -- (axby) = (???z) ---------------------------------------------------------- + +#define bli_ssszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dsszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_csszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zsszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sdszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ddszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cdszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zdszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_scszaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_dcszaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_ccszaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_zcszaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_szszaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_dzszaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_czszaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_zzszaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } + +#define bli_ssdzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dsdzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_csdzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zsdzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sddzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dddzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cddzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zddzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_scdzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_dcdzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_ccdzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_zcdzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_szdzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_dzdzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_czdzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_zzdzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } + +#define bli_ssczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dsczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_csczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zsczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sdczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ddczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cdczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zdczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_scczaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_dcczaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_ccczaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_zcczaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_szczaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_dzczaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_czczaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_zzczaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } + +#define bli_sszzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dszzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cszzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zszzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sdzzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ddzzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cdzzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zdzzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sczzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_dczzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_cczzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_zczzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_szzzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_dzzzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_czzzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_zzzzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } + +#endif // BLIS_ENABLE_C99_COMPLEX + + +#define bli_saxpbyjs( a, x, b, y ) bli_ssssaxpbyjs( a, x, b, y ) +#define bli_daxpbyjs( a, x, b, y ) bli_ddddaxpbyjs( a, x, b, y ) +#define bli_caxpbyjs( a, x, b, y ) bli_ccccaxpbyjs( a, x, b, y ) +#define bli_zaxpbyjs( a, x, b, y ) bli_zzzzaxpbyjs( a, x, b, y ) + + +#endif + diff --git a/frame/include/level0/bli_axpbys.h b/frame/include/level0/bli_axpbys.h new file mode 100644 index 000000000..6b9bffd87 --- /dev/null +++ b/frame/include/level0/bli_axpbys.h @@ -0,0 +1,481 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_AXPBYS_H +#define BLIS_AXPBYS_H + +// axpbys + +// Notes: +// - The first char encodes the type of a. +// - The second char encodes the type of x. +// - The third char encodes the type of b. +// - The fourth char encodes the type of y. + + +// -- (axby) = (???s) ---------------------------------------------------------- + +#define bli_ssssaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dsssaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_csssaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zsssaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_sdssaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_ddssaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cdssaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zdssaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_scssaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dcssaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_ccssaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zcssaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_szssaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dzssaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_czssaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zzssaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) + +#define bli_ssdsaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dsdsaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_csdsaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zsdsaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_sddsaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dddsaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cddsaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zddsaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_scdsaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dcdsaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_ccdsaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zcdsaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_szdsaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dzdsaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_czdsaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zzdsaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) + +#define bli_sscsaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dscsaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cscsaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zscsaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_sdcsaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_ddcsaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cdcsaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zdcsaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_sccsaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dccsaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cccsaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zccsaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_szcsaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dzcsaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_czcsaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zzcsaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) + +#define bli_sszsaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dszsaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cszsaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zszsaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_sdzsaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_ddzsaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cdzsaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zdzsaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_sczsaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dczsaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cczsaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zczsaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_szzsaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dzzsaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_czzsaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zzzsaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) + +// -- (axby) = (???d) ---------------------------------------------------------- + +#define bli_sssdaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dssdaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cssdaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zssdaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_sdsdaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ddsdaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cdsdaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zdsdaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_scsdaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dcsdaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ccsdaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zcsdaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_szsdaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dzsdaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_czsdaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zzsdaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) + +#define bli_ssddaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dsddaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_csddaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zsddaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_sdddaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ddddaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cdddaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zdddaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_scddaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dcddaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ccddaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zcddaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_szddaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dzddaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_czddaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zzddaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) + +#define bli_sscdaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dscdaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cscdaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zscdaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_sdcdaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ddcdaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cdcdaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zdcdaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_sccdaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dccdaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cccdaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zccdaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_szcdaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dzcdaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_czcdaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zzcdaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) + +#define bli_sszdaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dszdaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cszdaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zszdaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_sdzdaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ddzdaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cdzdaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zdzdaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_sczdaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dczdaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cczdaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zczdaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_szzdaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dzzdaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_czzdaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zzzdaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) + +#ifndef BLIS_ENABLE_C99_COMPLEX + +// -- (axby) = (???c) ---------------------------------------------------------- + +#define bli_ssscaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dsscaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_csscaxpbys( a, x, b, y ) bli_cssaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zsscaxpbys( a, x, b, y ) bli_cssaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_sdscaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ddscaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cdscaxpbys( a, x, b, y ) bli_cssaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zdscaxpbys( a, x, b, y ) bli_cssaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_scscaxpbys( a, x, b, y ) bli_scsaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dcscaxpbys( a, x, b, y ) bli_scsaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ccscaxpbys( a, x, b, y ) bli_ccsaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zcscaxpbys( a, x, b, y ) bli_ccsaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_szscaxpbys( a, x, b, y ) bli_scsaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dzscaxpbys( a, x, b, y ) bli_scsaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_czscaxpbys( a, x, b, y ) bli_ccsaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zzscaxpbys( a, x, b, y ) bli_ccsaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) + +#define bli_ssdcaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dsdcaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_csdcaxpbys( a, x, b, y ) bli_cssaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zsdcaxpbys( a, x, b, y ) bli_cssaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_sddcaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dddcaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cddcaxpbys( a, x, b, y ) bli_cssaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zddcaxpbys( a, x, b, y ) bli_cssaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_scdcaxpbys( a, x, b, y ) bli_scsaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dcdcaxpbys( a, x, b, y ) bli_scsaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ccdcaxpbys( a, x, b, y ) bli_ccsaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zcdcaxpbys( a, x, b, y ) bli_ccsaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_szdcaxpbys( a, x, b, y ) bli_scsaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dzdcaxpbys( a, x, b, y ) bli_scsaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_czdcaxpbys( a, x, b, y ) bli_ccsaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zzdcaxpbys( a, x, b, y ) bli_ccsaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) + +#define bli_ssccaxpbys( a, x, b, y ) bli_sscaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dsccaxpbys( a, x, b, y ) bli_sscaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_csccaxpbys( a, x, b, y ) bli_cscaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zsccaxpbys( a, x, b, y ) bli_cscaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_sdccaxpbys( a, x, b, y ) bli_sscaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ddccaxpbys( a, x, b, y ) bli_sscaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cdccaxpbys( a, x, b, y ) bli_cscaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zdccaxpbys( a, x, b, y ) bli_cscaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_scccaxpbys( a, x, b, y ) bli_sccaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dcccaxpbys( a, x, b, y ) bli_sccaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ccccaxpbys( a, x, b, y ) bli_caxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zcccaxpbys( a, x, b, y ) bli_caxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_szccaxpbys( a, x, b, y ) bli_sccaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dzccaxpbys( a, x, b, y ) bli_sccaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_czccaxpbys( a, x, b, y ) bli_caxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zzccaxpbys( a, x, b, y ) bli_caxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) + +#define bli_sszcaxpbys( a, x, b, y ) bli_sscaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dszcaxpbys( a, x, b, y ) bli_sscaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cszcaxpbys( a, x, b, y ) bli_cscaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zszcaxpbys( a, x, b, y ) bli_cscaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_sdzcaxpbys( a, x, b, y ) bli_sscaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ddzcaxpbys( a, x, b, y ) bli_sscaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cdzcaxpbys( a, x, b, y ) bli_cscaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zdzcaxpbys( a, x, b, y ) bli_cscaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_sczcaxpbys( a, x, b, y ) bli_sccaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dczcaxpbys( a, x, b, y ) bli_sccaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cczcaxpbys( a, x, b, y ) bli_caxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zczcaxpbys( a, x, b, y ) bli_caxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_szzcaxpbys( a, x, b, y ) bli_sccaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dzzcaxpbys( a, x, b, y ) bli_sccaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_czzcaxpbys( a, x, b, y ) bli_caxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zzzcaxpbys( a, x, b, y ) bli_caxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) + +// -- (axby) = (???z) ---------------------------------------------------------- + +#define bli_ssszaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dsszaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_csszaxpbys( a, x, b, y ) bli_zddaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zsszaxpbys( a, x, b, y ) bli_zddaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_sdszaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ddszaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cdszaxpbys( a, x, b, y ) bli_zddaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zdszaxpbys( a, x, b, y ) bli_zddaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_scszaxpbys( a, x, b, y ) bli_dzdaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dcszaxpbys( a, x, b, y ) bli_dzdaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ccszaxpbys( a, x, b, y ) bli_zzdaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zcszaxpbys( a, x, b, y ) bli_zzdaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_szszaxpbys( a, x, b, y ) bli_dzdaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dzszaxpbys( a, x, b, y ) bli_dzdaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_czszaxpbys( a, x, b, y ) bli_zzdaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zzszaxpbys( a, x, b, y ) bli_zzdaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) + +#define bli_ssdzaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dsdzaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_csdzaxpbys( a, x, b, y ) bli_zddaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zsdzaxpbys( a, x, b, y ) bli_zddaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_sddzaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dddzaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cddzaxpbys( a, x, b, y ) bli_zddaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zddzaxpbys( a, x, b, y ) bli_zddaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_scdzaxpbys( a, x, b, y ) bli_dzdaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dcdzaxpbys( a, x, b, y ) bli_dzdaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ccdzaxpbys( a, x, b, y ) bli_zzdaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zcdzaxpbys( a, x, b, y ) bli_zzdaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_szdzaxpbys( a, x, b, y ) bli_dzdaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dzdzaxpbys( a, x, b, y ) bli_dzdaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_czdzaxpbys( a, x, b, y ) bli_zzdaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zzdzaxpbys( a, x, b, y ) bli_zzdaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) + +#define bli_ssczaxpbys( a, x, b, y ) bli_ddzaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dsczaxpbys( a, x, b, y ) bli_ddzaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_csczaxpbys( a, x, b, y ) bli_zdzaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zsczaxpbys( a, x, b, y ) bli_zdzaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_sdczaxpbys( a, x, b, y ) bli_ddzaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ddczaxpbys( a, x, b, y ) bli_ddzaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cdczaxpbys( a, x, b, y ) bli_zdzaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zdczaxpbys( a, x, b, y ) bli_zdzaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_scczaxpbys( a, x, b, y ) bli_dzzaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dcczaxpbys( a, x, b, y ) bli_dzzaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ccczaxpbys( a, x, b, y ) bli_zaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zcczaxpbys( a, x, b, y ) bli_zaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_szczaxpbys( a, x, b, y ) bli_dzzaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dzczaxpbys( a, x, b, y ) bli_dzzaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_czczaxpbys( a, x, b, y ) bli_zaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zzczaxpbys( a, x, b, y ) bli_zaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) + +#define bli_sszzaxpbys( a, x, b, y ) bli_ddzaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dszzaxpbys( a, x, b, y ) bli_ddzaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cszzaxpbys( a, x, b, y ) bli_zdzaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zszzaxpbys( a, x, b, y ) bli_zdzaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_sdzzaxpbys( a, x, b, y ) bli_ddzaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ddzzaxpbys( a, x, b, y ) bli_ddzaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cdzzaxpbys( a, x, b, y ) bli_zdzaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zdzzaxpbys( a, x, b, y ) bli_zdzaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_sczzaxpbys( a, x, b, y ) bli_dzzaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dczzaxpbys( a, x, b, y ) bli_dzzaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cczzaxpbys( a, x, b, y ) bli_zaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zczzaxpbys( a, x, b, y ) bli_zaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_szzzaxpbys( a, x, b, y ) bli_dzzaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dzzzaxpbys( a, x, b, y ) bli_dzzaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_czzzaxpbys( a, x, b, y ) bli_zaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zzzzaxpbys( a, x, b, y ) bli_zaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) + +#else // ifdef BLIS_ENABLE_C99_COMPLEX + +// -- (axby) = (???c) ---------------------------------------------------------- + +#define bli_ssscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dsscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_csscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zsscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sdscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ddscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cdscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zdscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_scscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dcscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ccscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zcscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_szscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dzscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_czscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zzscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } + +#define bli_ssdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dsdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_csdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zsdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sddcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dddcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cddcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zddcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_scdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dcdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ccdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zcdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_szdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dzdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_czdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zzdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } + +#define bli_ssccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dsccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_csccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zsccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sdccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ddccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cdccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zdccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_scccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dcccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ccccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zcccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_szccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dzccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_czccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zzccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } + +#define bli_sszcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dszcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cszcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zszcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sdzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ddzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cdzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zdzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sczcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dczcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cczcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zczcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_szzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dzzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_czzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zzzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } + +// -- (axby) = (???z) ---------------------------------------------------------- + +#define bli_ssszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dsszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_csszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zsszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sdszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ddszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cdszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zdszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_scszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dcszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ccszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zcszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_szszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dzszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_czszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zzszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } + +#define bli_ssdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dsdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_csdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zsdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sddzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dddzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cddzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zddzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_scdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dcdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ccdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zcdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_szdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dzdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_czdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zzdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } + +#define bli_ssczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dsczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_csczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zsczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sdczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ddczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cdczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zdczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_scczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dcczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ccczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zcczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_szczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dzczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_czczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zzczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } + +#define bli_sszzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dszzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cszzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zszzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sdzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ddzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cdzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zdzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sczzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dczzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cczzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zczzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_szzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dzzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_czzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zzzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } + +#endif // BLIS_ENABLE_C99_COMPLEX + + +#define bli_saxpbys( a, x, b, y ) bli_ssssaxpbys( a, x, b, y ) +#define bli_daxpbys( a, x, b, y ) bli_ddddaxpbys( a, x, b, y ) +#define bli_caxpbys( a, x, b, y ) bli_ccccaxpbys( a, x, b, y ) +#define bli_zaxpbys( a, x, b, y ) bli_zzzzaxpbys( a, x, b, y ) + + +#endif + diff --git a/frame/include/level0/bli_xpbyjs.h b/frame/include/level0/bli_xpbyjs.h new file mode 100644 index 000000000..f59b42af3 --- /dev/null +++ b/frame/include/level0/bli_xpbyjs.h @@ -0,0 +1,192 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_XPBYJS_H +#define BLIS_XPBYJS_H + +// xpbyjs + +// Notes: +// - The first char encodes the type of x. +// - The second char encodes the type of b. +// - The third char encodes the type of y. + + +// -- (xby) = (??s) ------------------------------------------------------------ + +#define bli_sssxpbyjs( x, b, y ) bli_sxpbyjris( bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dssxpbyjs( x, b, y ) bli_sxpbyjris( bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cssxpbyjs( x, b, y ) bli_sxpbyjris( bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zssxpbyjs( x, b, y ) bli_sxpbyjris( bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) + +#define bli_sdsxpbyjs( x, b, y ) bli_sxpbyjris( bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_ddsxpbyjs( x, b, y ) bli_sxpbyjris( bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cdsxpbyjs( x, b, y ) bli_sxpbyjris( bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zdsxpbyjs( x, b, y ) bli_sxpbyjris( bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) + +#define bli_scsxpbyjs( x, b, y ) bli_sxpbyjris( bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dcsxpbyjs( x, b, y ) bli_sxpbyjris( bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_ccsxpbyjs( x, b, y ) bli_sxpbyjris( bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zcsxpbyjs( x, b, y ) bli_sxpbyjris( bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) + +#define bli_szsxpbyjs( x, b, y ) bli_sxpbyjris( bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dzsxpbyjs( x, b, y ) bli_sxpbyjris( bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_czsxpbyjs( x, b, y ) bli_sxpbyjris( bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zzsxpbyjs( x, b, y ) bli_sxpbyjris( bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) + +// -- (xby) = (??d) ------------------------------------------------------------ + +#define bli_ssdxpbyjs( x, b, y ) bli_dxpbyjris( bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dsdxpbyjs( x, b, y ) bli_dxpbyjris( bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_csdxpbyjs( x, b, y ) bli_dxpbyjris( bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zsdxpbyjs( x, b, y ) bli_dxpbyjris( bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) + +#define bli_sddxpbyjs( x, b, y ) bli_dxpbyjris( bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dddxpbyjs( x, b, y ) bli_dxpbyjris( bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cddxpbyjs( x, b, y ) bli_dxpbyjris( bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zddxpbyjs( x, b, y ) bli_dxpbyjris( bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) + +#define bli_scdxpbyjs( x, b, y ) bli_dxpbyjris( bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dcdxpbyjs( x, b, y ) bli_dxpbyjris( bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ccdxpbyjs( x, b, y ) bli_dxpbyjris( bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zcdxpbyjs( x, b, y ) bli_dxpbyjris( bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) + +#define bli_szdxpbyjs( x, b, y ) bli_dxpbyjris( bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dzdxpbyjs( x, b, y ) bli_dxpbyjris( bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_czdxpbyjs( x, b, y ) bli_dxpbyjris( bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zzdxpbyjs( x, b, y ) bli_dxpbyjris( bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) + +#ifndef BLIS_ENABLE_C99_COMPLEX + +// -- (xby) = (??c) ------------------------------------------------------------ + +#define bli_sscxpbyjs( x, b, y ) bli_sxpbyjris( bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dscxpbyjs( x, b, y ) bli_sxpbyjris( bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cscxpbyjs( x, b, y ) bli_scxpbyjris( bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zscxpbyjs( x, b, y ) bli_scxpbyjris( bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) + +#define bli_sdcxpbyjs( x, b, y ) bli_sxpbyjris( bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ddcxpbyjs( x, b, y ) bli_sxpbyjris( bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cdcxpbyjs( x, b, y ) bli_scxpbyjris( bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zdcxpbyjs( x, b, y ) bli_scxpbyjris( bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) + +#define bli_sccxpbyjs( x, b, y ) bli_cxpbyjris( bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dccxpbyjs( x, b, y ) bli_cxpbyjris( bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cccxpbyjs( x, b, y ) bli_cxpbyjris( bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zccxpbyjs( x, b, y ) bli_cxpbyjris( bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) + +#define bli_szcxpbyjs( x, b, y ) bli_cxpbyjris( bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dzcxpbyjs( x, b, y ) bli_cxpbyjris( bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_czcxpbyjs( x, b, y ) bli_cxpbyjris( bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zzcxpbyjs( x, b, y ) bli_cxpbyjris( bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) + +// -- (xby) = (??z) ------------------------------------------------------------ + +#define bli_sszxpbyjs( x, b, y ) bli_dxpbyjris( bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dszxpbyjs( x, b, y ) bli_dxpbyjris( bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cszxpbyjs( x, b, y ) bli_dzxpbyjris( bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zszxpbyjs( x, b, y ) bli_dzxpbyjris( bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) + +#define bli_sdzxpbyjs( x, b, y ) bli_dxpbyjris( bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ddzxpbyjs( x, b, y ) bli_dxpbyjris( bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cdzxpbyjs( x, b, y ) bli_dzxpbyjris( bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zdzxpbyjs( x, b, y ) bli_dzxpbyjris( bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) + +#define bli_sczxpbyjs( x, b, y ) bli_zxpbyjris( bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dczxpbyjs( x, b, y ) bli_zxpbyjris( bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cczxpbyjs( x, b, y ) bli_zxpbyjris( bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zczxpbyjs( x, b, y ) bli_zxpbyjris( bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) + +#define bli_szzxpbyjs( x, b, y ) bli_zxpbyjris( bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dzzxpbyjs( x, b, y ) bli_zxpbyjris( bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_czzxpbyjs( x, b, y ) bli_zxpbyjris( bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zzzxpbyjs( x, b, y ) bli_zxpbyjris( bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) + +#else // ifdef BLIS_ENABLE_C99_COMPLEX + +// -- (xby) = (??c) ------------------------------------------------------------ + +#define bli_sscxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_dscxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_cscxpbyjs( x, b, y ) { (y) = conjf(x) + (b) * (y); } +#define bli_zscxpbyjs( x, b, y ) { (y) = conj(x) + (b) * (y); } + +#define bli_sdcxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_ddcxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_cdcxpbyjs( x, b, y ) { (y) = conjf(x) + (b) * (y); } +#define bli_zdcxpbyjs( x, b, y ) { (y) = conj(x) + (b) * (y); } + +#define bli_sccxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_dccxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_cccxpbyjs( x, b, y ) { (y) = conjf(x) + (b) * (y); } +#define bli_zccxpbyjs( x, b, y ) { (y) = conj(x) + (b) * (y); } + +#define bli_szcxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_dzcxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_czcxpbyjs( x, b, y ) { (y) = conjf(x) + (b) * (y); } +#define bli_zzcxpbyjs( x, b, y ) { (y) = conj(x) + (b) * (y); } + +// -- (xby) = (??z) ------------------------------------------------------------ + +#define bli_sszxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_dszxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_cszxpbyjs( x, b, y ) { (y) = conjf(x) + (b) * (y); } +#define bli_zszxpbyjs( x, b, y ) { (y) = conj(x) + (b) * (y); } + +#define bli_sdzxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_ddzxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_cdzxpbyjs( x, b, y ) { (y) = conjf(x) + (b) * (y); } +#define bli_zdzxpbyjs( x, b, y ) { (y) = conj(x) + (b) * (y); } + +#define bli_sczxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_dczxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_cczxpbyjs( x, b, y ) { (y) = conjf(x) + (b) * (y); } +#define bli_zczxpbyjs( x, b, y ) { (y) = conj(x) + (b) * (y); } + +#define bli_szzxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_dzzxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_czzxpbyjs( x, b, y ) { (y) = conjf(x) + (b) * (y); } +#define bli_zzzxpbyjs( x, b, y ) { (y) = conj(x) + (b) * (y); } + +#endif // BLIS_ENABLE_C99_COMPLEX + + +#define bli_sxpbyjs( x, b, y ) bli_sssxpbyjs( x, b, y ) +#define bli_dxpbyjs( x, b, y ) bli_dddxpbyjs( x, b, y ) +#define bli_cxpbyjs( x, b, y ) bli_cccxpbyjs( x, b, y ) +#define bli_zxpbyjs( x, b, y ) bli_zzzxpbyjs( x, b, y ) + + +#endif + diff --git a/frame/include/level0/ri/bli_axpbyjris.h b/frame/include/level0/ri/bli_axpbyjris.h new file mode 100644 index 000000000..24512d223 --- /dev/null +++ b/frame/include/level0/ri/bli_axpbyjris.h @@ -0,0 +1,163 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_AXPBYJRIS_H +#define BLIS_AXPBYJRIS_H + +// axpbyjris + +#define bli_saxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + (yr) = (ar) * (xr) + (br) * (yr); \ +} + +#define bli_daxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + (yr) = (ar) * (xr) + (br) * (yr); \ +} + +#define bli_caxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) + (ai) * (xi) + (br) * (yr) - (bi) * (yi); \ + float yt_i = (ai) * (xr) - (ar) * (xi) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_sccaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) + (br) * (yr) - (bi) * (yi); \ + float yt_i = -(ar) * (xi) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_ccsaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) + (ai) * (xi) + (br) * (yr); \ + float yt_i = (ai) * (xr) - (ar) * (xi) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_cscaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) + (br) * (yr) - (bi) * (yi); \ + float yt_i = (ai) * (xr) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_sscaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) + (br) * (yr) - (bi) * (yi); \ + float yt_i = (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_cssaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) + (br) * (yr); \ + float yt_i = (ai) * (xr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_scsaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) + (br) * (yr); \ + float yt_i = -(ar) * (xi) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_zaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) + (ai) * (xi) + (br) * (yr) - (bi) * (yi); \ + double yt_i = (ai) * (xr) - (ar) * (xi) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_dzzaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) + (br) * (yr) - (bi) * (yi); \ + double yt_i = -(ar) * (xi) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_zzdaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) + (ai) * (xi) + (br) * (yr); \ + double yt_i = (ai) * (xr) - (ar) * (xi) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_zdzaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) + (br) * (yr) - (bi) * (yi); \ + double yt_i = (ai) * (xr) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_ddzaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) + (br) * (yr) - (bi) * (yi); \ + double yt_i = (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_zddaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) + (br) * (yr); \ + double yt_i = (ai) * (xr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_dzdaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) + (br) * (yr); \ + double yt_i = -(ar) * (xi) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#endif + diff --git a/frame/include/level0/ri/bli_axpbyris.h b/frame/include/level0/ri/bli_axpbyris.h index 2303db350..fbc49f0fa 100644 --- a/frame/include/level0/ri/bli_axpbyris.h +++ b/frame/include/level0/ri/bli_axpbyris.h @@ -49,10 +49,58 @@ #define bli_caxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ { \ - float yt_r = (ar) * (xr) - (ai) * (xi) + (br) * (yr) - (bi) * (yi); \ - float yt_i = (ai) * (xr) + (ar) * (xi) + (bi) * (yr) + (br) * (yi); \ - (yr) = yt_r; \ - (yi) = yt_i; \ + float yt_r = (ar) * (xr) - (ai) * (xi) + (br) * (yr) - (bi) * (yi); \ + float yt_i = (ai) * (xr) + (ar) * (xi) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_sccaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) + (br) * (yr) - (bi) * (yi); \ + float yt_i = (ar) * (xi) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_ccsaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) - (ai) * (xi) + (br) * (yr); \ + float yt_i = (ai) * (xr) + (ar) * (xi) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_cscaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) + (br) * (yr) - (bi) * (yi); \ + float yt_i = (ai) * (xr) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_sscaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) + (br) * (yr) - (bi) * (yi); \ + float yt_i = (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_cssaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) + (br) * (yr); \ + float yt_i = (ai) * (xr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_scsaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) + (br) * (yr); \ + float yt_i = (ar) * (xi) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ } #define bli_zaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ @@ -63,5 +111,53 @@ (yi) = yt_i; \ } +#define bli_dzzaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) + (br) * (yr) - (bi) * (yi); \ + double yt_i = (ar) * (xi) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_zzdaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) - (ai) * (xi) + (br) * (yr); \ + double yt_i = (ai) * (xr) + (ar) * (xi) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_zdzaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) + (br) * (yr) - (bi) * (yi); \ + double yt_i = (ai) * (xr) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_ddzaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) + (br) * (yr) - (bi) * (yi); \ + double yt_i = (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_zddaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) + (br) * (yr); \ + double yt_i = (ai) * (xr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_dzdaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) + (br) * (yr); \ + double yt_i = (ar) * (xi) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + #endif diff --git a/frame/include/level0/ri/bli_xpbyjris.h b/frame/include/level0/ri/bli_xpbyjris.h new file mode 100644 index 000000000..fe3cf6767 --- /dev/null +++ b/frame/include/level0/ri/bli_xpbyjris.h @@ -0,0 +1,79 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_XPBYJRIS_H +#define BLIS_XPBYJRIS_H + +// xpbyjris + +#define bli_sxpbyjris( xr, xi, br, bi, yr, yi ) \ +{ \ + (yr) = (xr) + (br) * (yr); \ +} + +#define bli_dxpbyjris( xr, xi, br, bi, yr, yi ) \ +{ \ + (yr) = (xr) + (br) * (yr); \ +} + +#define bli_cxpbyjris( xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (xr) + (br) * (yr) - (bi) * (yi); \ + float yt_i = -(xi) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_zxpbyjris( xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (xr) + (br) * (yr) - (bi) * (yi); \ + double yt_i = -(xi) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_scxpbyjris( xr, xi, br, bi, yr, yi ) \ +{ \ + (yr) = (xr) + (br) * (yr); \ + (yi) = -(xi) + (br) * (yi); \ +} + +#define bli_dzxpbyjris( xr, xi, br, bi, yr, yi ) \ +{ \ + (yr) = (xr) + (br) * (yr); \ + (yi) = -(xi) + (br) * (yi); \ +} + +#endif + diff --git a/testsuite/input.operations b/testsuite/input.operations index 87b1090b0..058721632 100644 --- a/testsuite/input.operations +++ b/testsuite/input.operations @@ -107,6 +107,11 @@ -1 # dimensions: m ? # parameters: conjx +1 # axpbyv +1 # test sequential front-end +-1 # dimensions: m +? # parameters: conjx + 1 # axpyv 1 # test sequential front-end -1 # dimensions: m @@ -150,6 +155,11 @@ -1 # dimensions: m ? # parameters: conjx +1 # xpbyv +1 # test sequential front-end +-1 # dimensions: m +? # parameters: conjx + # --- Level-1m ------------------------------------------------------------- diff --git a/testsuite/src/test_axpbyv.c b/testsuite/src/test_axpbyv.c new file mode 100644 index 000000000..d9e3c18a0 --- /dev/null +++ b/testsuite/src/test_axpbyv.c @@ -0,0 +1,282 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" +#include "test_libblis.h" + + +// Static variables. +static char* op_str = "axpbyv"; +static char* o_types = "vv"; // x y +static char* p_types = "c"; // conjx +static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s + { 1e-04, 1e-05 }, // warn, pass for c + { 1e-13, 1e-14 }, // warn, pass for d + { 1e-13, 1e-14 } }; // warn, pass for z + +// Local prototypes. +void libblis_test_axpbyv_deps( test_params_t* params, + test_op_t* op ); + +void libblis_test_axpbyv_experiment( test_params_t* params, + test_op_t* op, + iface_t iface, + num_t datatype, + char* pc_str, + char* sc_str, + unsigned int p_cur, + double* perf, + double* resid ); + +void libblis_test_axpbyv_impl( iface_t iface, + obj_t* alpha, + obj_t* x, + obj_t* beta, + obj_t* y ); + +void libblis_test_axpbyv_check( obj_t* alpha, + obj_t* x, + obj_t* beta, + obj_t* y, + obj_t* y_orig, + double* resid ); + + + +void libblis_test_axpbyv_deps( test_params_t* params, test_op_t* op ) +{ + libblis_test_randv( params, &(op->ops->randv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); + libblis_test_addv( params, &(op->ops->addv) ); + libblis_test_axpyv( params, &(op->ops->axpyv) ); + libblis_test_subv( params, &(op->ops->subv) ); + libblis_test_copyv( params, &(op->ops->copyv) ); + libblis_test_scalv( params, &(op->ops->scalv) ); + libblis_test_scal2v( params, &(op->ops->scal2v) ); + libblis_test_xpbyv( params, &(op->ops->xpbyv) ); +} + + + +void libblis_test_axpbyv( test_params_t* params, test_op_t* op ) +{ + + // Return early if this test has already been done. + if ( op->test_done == TRUE ) return; + + // Return early if operation is disabled. + if ( op->op_switch == DISABLE_ALL || + op->ops->l1v_over == DISABLE_ALL ) return; + + // Call dependencies first. + if ( TRUE ) libblis_test_axpbyv_deps( params, op ); + + // Execute the test driver for each implementation requested. + if ( op->front_seq == ENABLE ) + { + libblis_test_op_driver( params, + op, + BLIS_TEST_SEQ_FRONT_END, + op_str, + p_types, + o_types, + thresh, + libblis_test_axpbyv_experiment ); + } +} + + + +void libblis_test_axpbyv_experiment( test_params_t* params, + test_op_t* op, + iface_t iface, + num_t datatype, + char* pc_str, + char* sc_str, + unsigned int p_cur, + double* perf, + double* resid ) +{ + unsigned int n_repeats = params->n_repeats; + unsigned int i; + + double time_min = 1e9; + double time; + + dim_t m; + + conj_t conjx; + + obj_t alpha, beta, x, y; + obj_t y_save; + + + // Map the dimension specifier to an actual dimension. + m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); + + // Map parameter characters to BLIS constants. + bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); + + // Create test scalars. + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); + + // Create test operands (vectors and/or matrices). + libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); + libblis_test_vobj_create( params, datatype, sc_str[1], m, &y ); + libblis_test_vobj_create( params, datatype, sc_str[1], m, &y_save ); + + // Set alpha. + //bli_setsc( sqrt(2.0)/2.0, sqrt(2.0)/2.0, &alpha ); + //bli_copysc( &BLIS_TWO, &alpha ); + if ( bli_obj_is_real( y ) ) + bli_setsc( -2.0, 0.0, &alpha ); + else + bli_setsc( 0.0, -2.0, &alpha ); + + bli_setsc( -1.0, 0.0, &beta ); + + // Randomize x and y, and save y. + bli_randv( &x ); + bli_randv( &y ); + bli_copyv( &y, &y_save ); + + // Apply the parameters. + bli_obj_set_conj( conjx, x ); + + // Repeat the experiment n_repeats times and record results. + for ( i = 0; i < n_repeats; ++i ) + { + bli_copyv( &y_save, &y ); + + time = bli_clock(); + + libblis_test_axpbyv_impl( iface, &alpha, &x, &beta, &y ); + + time_min = bli_clock_min_diff( time_min, time ); + } + + // Estimate the performance of the best experiment repeat. + *perf = ( 3.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( y ) ) *perf *= 14.0 / 3.0; + + // Perform checks. + libblis_test_axpbyv_check( &alpha, &x, &beta, &y, &y_save, resid ); + + // Zero out performance and residual if output vector is empty. + libblis_test_check_empty_problem( &y, perf, resid ); + + // Free the test objects. + bli_obj_free( &x ); + bli_obj_free( &y ); + bli_obj_free( &y_save ); +} + + + +void libblis_test_axpbyv_impl( iface_t iface, + obj_t* alpha, + obj_t* x, + obj_t* beta, + obj_t* y ) +{ + switch ( iface ) + { + case BLIS_TEST_SEQ_FRONT_END: + bli_axpbyv( alpha, x, beta, y ); + break; + + default: + libblis_test_printf_error( "Invalid interface type.\n" ); + } +} + + + +void libblis_test_axpbyv_check( obj_t* alpha, + obj_t* x, + obj_t* beta, + obj_t* y, + obj_t* y_orig, + double* resid ) +{ + num_t dt = bli_obj_datatype( *y ); + num_t dt_real = bli_obj_datatype_proj_to_real( *y ); + + dim_t m = bli_obj_vector_dim( *y ); + + obj_t x_temp, y_temp; + obj_t norm; + + double junk; + + // + // Pre-conditions: + // - x is randomized. + // - y_orig is randomized. + // Note: + // - alpha should have a non-zero imaginary component in the complex + // cases in order to more fully exercise the implementation. + // + // Under these conditions, we assume that the implementation for + // + // y := beta * y_orig + alpha * conjx(x) + // + // is functioning correctly if + // + // normf( y - ( beta * y_orig + alpha * conjx(x) ) ) + // + // is negligible. + // + + bli_obj_scalar_init_detached( dt_real, &norm ); + + bli_obj_create( dt, m, 1, 0, 0, &x_temp ); + bli_obj_create( dt, m, 1, 0, 0, &y_temp ); + + bli_copyv( x, &x_temp ); + bli_copyv( y_orig, &y_temp ); + + bli_scalv( alpha, &x_temp ); + bli_scalv( beta, &y_temp ); + bli_addv( &x_temp, &y_temp ); + + bli_subv( &y_temp, y ); + bli_normfv( y, &norm ); + bli_getsc( &norm, resid, &junk ); + + bli_obj_free( &x_temp ); + bli_obj_free( &y_temp ); +} + diff --git a/testsuite/src/test_axpbyv.h b/testsuite/src/test_axpbyv.h new file mode 100644 index 000000000..d0ff10644 --- /dev/null +++ b/testsuite/src/test_axpbyv.h @@ -0,0 +1,36 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void libblis_test_axpbyv( test_params_t* params, test_op_t* op ); + diff --git a/testsuite/src/test_libblis.c b/testsuite/src/test_libblis.c index d07eade90..d48516a12 100644 --- a/testsuite/src/test_libblis.c +++ b/testsuite/src/test_libblis.c @@ -113,7 +113,8 @@ void libblis_test_utility_ops( test_params_t* params, test_ops_t* ops ) void libblis_test_level1v_ops( test_params_t* params, test_ops_t* ops ) { libblis_test_addv( params, &(ops->addv) ); - libblis_test_axpyv( params, &(ops->axpyv) ); + libblis_test_axpbyv( params, &(ops->axpbyv) ); + libblis_test_axpyv( params, &(ops->axpyv) ); libblis_test_copyv( params, &(ops->copyv) ); libblis_test_dotv( params, &(ops->dotv) ); libblis_test_dotxv( params, &(ops->dotxv) ); @@ -122,6 +123,7 @@ void libblis_test_level1v_ops( test_params_t* params, test_ops_t* ops ) libblis_test_scal2v( params, &(ops->scal2v) ); libblis_test_setv( params, &(ops->setv) ); libblis_test_subv( params, &(ops->subv) ); + libblis_test_xpbyv( params, &(ops->xpbyv) ); } @@ -220,6 +222,7 @@ void libblis_test_read_ops_file( char* input_filename, test_ops_t* ops ) // Level-1v libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->addv) ); + libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->axpbyv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->axpyv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->copyv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 2, &(ops->dotv) ); @@ -229,6 +232,7 @@ void libblis_test_read_ops_file( char* input_filename, test_ops_t* ops ) libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->scal2v) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 0, &(ops->setv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->subv) ); + libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->xpbyv) ); // Level-1m libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MN, 1, &(ops->addm) ); diff --git a/testsuite/src/test_libblis.h b/testsuite/src/test_libblis.h index 0725dd400..8a84a2d9b 100644 --- a/testsuite/src/test_libblis.h +++ b/testsuite/src/test_libblis.h @@ -204,7 +204,8 @@ typedef struct test_ops_s // level-1v test_op_t addv; - test_op_t axpyv; + test_op_t axpbyv; + test_op_t axpyv; test_op_t copyv; test_op_t dotv; test_op_t dotxv; @@ -213,6 +214,7 @@ typedef struct test_ops_s test_op_t scal2v; test_op_t setv; test_op_t subv; + test_op_t xpbyv; // level-1m test_op_t addm; @@ -415,6 +417,7 @@ void libblis_test_check_empty_problem( obj_t* c, double* perf, double* resid ); // Level-1v #include "test_addv.h" +#include "test_axpbyv.h" #include "test_axpyv.h" #include "test_copyv.h" #include "test_dotv.h" @@ -424,6 +427,7 @@ void libblis_test_check_empty_problem( obj_t* c, double* perf, double* resid ); #include "test_scal2v.h" #include "test_setv.h" #include "test_subv.h" +#include "test_xpbyv.h" // Level-1m #include "test_addm.h" diff --git a/testsuite/src/test_xpbyv.c b/testsuite/src/test_xpbyv.c new file mode 100644 index 000000000..a6610905b --- /dev/null +++ b/testsuite/src/test_xpbyv.c @@ -0,0 +1,268 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" +#include "test_libblis.h" + + +// Static variables. +static char* op_str = "xpbyv"; +static char* o_types = "vv"; // x y +static char* p_types = "c"; // conjx +static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s + { 1e-04, 1e-05 }, // warn, pass for c + { 1e-13, 1e-14 }, // warn, pass for d + { 1e-13, 1e-14 } }; // warn, pass for z + +// Local prototypes. +void libblis_test_xpbyv_deps( test_params_t* params, + test_op_t* op ); + +void libblis_test_xpbyv_experiment( test_params_t* params, + test_op_t* op, + iface_t iface, + num_t datatype, + char* pc_str, + char* sc_str, + unsigned int p_cur, + double* perf, + double* resid ); + +void libblis_test_xpbyv_impl( iface_t iface, + obj_t* x, + obj_t* beta, + obj_t* y ); + +void libblis_test_xpbyv_check( obj_t* x, + obj_t* beta, + obj_t* y, + obj_t* y_orig, + double* resid ); + + + +void libblis_test_xpbyv_deps( test_params_t* params, test_op_t* op ) +{ + libblis_test_randv( params, &(op->ops->randv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); + libblis_test_addv( params, &(op->ops->addv) ); + libblis_test_subv( params, &(op->ops->subv) ); + libblis_test_copyv( params, &(op->ops->copyv) ); +} + + + +void libblis_test_xpbyv( test_params_t* params, test_op_t* op ) +{ + + // Return early if this test has already been done. + if ( op->test_done == TRUE ) return; + + // Return early if operation is disabled. + if ( op->op_switch == DISABLE_ALL || + op->ops->l1v_over == DISABLE_ALL ) return; + + // Call dependencies first. + if ( TRUE ) libblis_test_xpbyv_deps( params, op ); + + // Execute the test driver for each implementation requested. + if ( op->front_seq == ENABLE ) + { + libblis_test_op_driver( params, + op, + BLIS_TEST_SEQ_FRONT_END, + op_str, + p_types, + o_types, + thresh, + libblis_test_xpbyv_experiment ); + } +} + + + +void libblis_test_xpbyv_experiment( test_params_t* params, + test_op_t* op, + iface_t iface, + num_t datatype, + char* pc_str, + char* sc_str, + unsigned int p_cur, + double* perf, + double* resid ) +{ + unsigned int n_repeats = params->n_repeats; + unsigned int i; + + double time_min = 1e9; + double time; + + dim_t m; + + conj_t conjx; + + obj_t beta, x, y; + obj_t y_save; + + + // Map the dimension specifier to an actual dimension. + m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); + + // Map parameter characters to BLIS constants. + bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); + + // Create test scalars. + bli_obj_scalar_init_detached( datatype, &beta ); + + // Create test operands (vectors and/or matrices). + libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); + libblis_test_vobj_create( params, datatype, sc_str[1], m, &y ); + libblis_test_vobj_create( params, datatype, sc_str[1], m, &y_save ); + + // Set beta. + if ( bli_obj_is_real( y ) ) + bli_setsc( -2.0, 0.0, &beta ); + else + bli_setsc( 0.0, -2.0, &beta ); + + // Randomize x and y, and save y. + bli_randv( &x ); + bli_randv( &y ); + bli_copyv( &y, &y_save ); + + // Apply the parameters. + bli_obj_set_conj( conjx, x ); + + // Repeat the experiment n_repeats times and record results. + for ( i = 0; i < n_repeats; ++i ) + { + bli_copyv( &y_save, &y ); + + time = bli_clock(); + + libblis_test_xpbyv_impl( iface, &x, &beta, &y ); + + time_min = bli_clock_min_diff( time_min, time ); + } + + // Estimate the performance of the best experiment repeat. + *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( y ) ) *perf *= 4.0; + + // Perform checks. + libblis_test_xpbyv_check( &x, &beta, &y, &y_save, resid ); + + // Zero out performance and residual if output vector is empty. + libblis_test_check_empty_problem( &y, perf, resid ); + + // Free the test objects. + bli_obj_free( &x ); + bli_obj_free( &y ); + bli_obj_free( &y_save ); +} + + + +void libblis_test_xpbyv_impl( iface_t iface, + obj_t* x, + obj_t* beta, + obj_t* y ) +{ + switch ( iface ) + { + case BLIS_TEST_SEQ_FRONT_END: + bli_xpbyv( x, beta, y ); + break; + + default: + libblis_test_printf_error( "Invalid interface type.\n" ); + } +} + + + +void libblis_test_xpbyv_check( obj_t* x, + obj_t* beta, + obj_t* y, + obj_t* y_orig, + double* resid ) +{ + num_t dt = bli_obj_datatype( *y ); + num_t dt_real = bli_obj_datatype_proj_to_real( *y ); + + dim_t m = bli_obj_vector_dim( *y ); + + obj_t x_temp, y_temp; + obj_t norm; + + double junk; + + // + // Pre-conditions: + // - x is randomized. + // - y_orig is randomized. + // Note: + // - beta should have a non-zero imaginary component in the complex + // cases in order to more fully exercise the implementation. + // + // Under these conditions, we assume that the implementation for + // + // y := beta * y_orig + conjx(x) + // + // is functioning correctly if + // + // normf( y - ( beta * y_orig + conjx(x) ) ) + // + // is negligible. + // + + bli_obj_scalar_init_detached( dt_real, &norm ); + + bli_obj_create( dt, m, 1, 0, 0, &x_temp ); + bli_obj_create( dt, m, 1, 0, 0, &y_temp ); + + bli_copyv( x, &x_temp ); + bli_copyv( y_orig, &y_temp ); + + bli_scalv( beta, &y_temp ); + bli_addv( &x_temp, &y_temp ); + + bli_subv( &y_temp, y ); + bli_normfv( y, &norm ); + bli_getsc( &norm, resid, &junk ); + + bli_obj_free( &x_temp ); + bli_obj_free( &y_temp ); +} + diff --git a/testsuite/src/test_xpbyv.h b/testsuite/src/test_xpbyv.h new file mode 100644 index 000000000..79b01c18d --- /dev/null +++ b/testsuite/src/test_xpbyv.h @@ -0,0 +1,36 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void libblis_test_xpbyv( test_params_t* params, test_op_t* op ); + From bbb8569b2a08c3bcd631d5a05eb389d01d94ac07 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Wed, 27 Apr 2016 14:13:46 -0500 Subject: [PATCH 09/14] Use 'restrict' in all kernel APIs; wspace changes. Details: - Updated level-1v, level-1f kernel function types (bli_l1?_ft.h) and generic kernel prototypes (bli_l1?_ker.h) to use 'restrict' for all numerical operand pointers (ie: all pointers except the cntx_t). - Updated level-1f reference kernel definitions to use 'restrict' for all numerical operand pointers. (Level-1v reference kernel definitions were already updated in bdbda6e.) - Rewrote the level-1v and level-1f reference kernel prototypes in bli_l1v_ref.h and bli_l1f_ref.h, respectively, to simply #include bli_l1v_ker.h and bli_l1f_ker.h with redefined function base names (as was already being done for the level-3 micro-kernel prototypes in bli_l3_ref.h), rather than duplicate the signatures from the _ker.h files. - Added definitions to frame/include/bli_kernel_prototypes.h for axpbyv and xpbyv, which were probably meant for inclusion in bdbda6e. - Converted a number of instances of four spaces, as introduced in bdbda6e, to tabs. --- frame/1/bli_l1v_check.c | 198 ++++++------- frame/1/bli_l1v_check.h | 35 +-- frame/1/bli_l1v_cntx.c | 80 ++--- frame/1/bli_l1v_ft.h | 106 ++++--- frame/1/bli_l1v_ker.h | 144 ++++----- frame/1/bli_l1v_oapi.c | 223 +++++++------- frame/1/bli_l1v_oapi.h | 36 +-- frame/1/bli_l1v_tapi.c | 148 +++++----- frame/1/bli_l1v_tapi.h | 2 +- frame/1/kernels/bli_addv_ref.c | 74 ++--- frame/1/kernels/bli_axpbyv_ref.c | 342 +++++++++++----------- frame/1/kernels/bli_axpyv_ref.c | 74 ++--- frame/1/kernels/bli_copyv_ref.c | 74 ++--- frame/1/kernels/bli_dotv_ref.c | 76 ++--- frame/1/kernels/bli_dotxv_ref.c | 76 ++--- frame/1/kernels/bli_invertv_ref.c | 36 +-- frame/1/kernels/bli_l1v_ref.h | 159 ++-------- frame/1/kernels/bli_scal2v_ref.c | 106 +++---- frame/1/kernels/bli_scalv_ref.c | 38 +-- frame/1/kernels/bli_setv_ref.c | 70 ++--- frame/1/kernels/bli_subv_ref.c | 70 ++--- frame/1/kernels/bli_swapv_ref.c | 38 +-- frame/1/kernels/bli_xpbyv_ref.c | 102 +++---- frame/1f/bli_l1f_ft.h | 52 ++-- frame/1f/bli_l1f_ker.h | 104 +++---- frame/1f/kernels/bli_axpy2v_ref.c | 18 +- frame/1f/kernels/bli_axpyf_ref.c | 18 +- frame/1f/kernels/bli_dotaxpyv_ref.c | 20 +- frame/1f/kernels/bli_dotxaxpyf_ref_var1.c | 28 +- frame/1f/kernels/bli_dotxaxpyf_ref_var2.c | 28 +- frame/1f/kernels/bli_dotxf_ref.c | 20 +- frame/1f/kernels/bli_l1f_ref.h | 131 +-------- frame/include/bli_kernel_pre_macro_defs.h | 8 +- frame/include/bli_kernel_prototypes.h | 10 + frame/include/bli_type_defs.h | 4 +- testsuite/src/test_axpbyv.c | 18 +- testsuite/src/test_libblis.c | 10 +- testsuite/src/test_libblis.h | 8 +- testsuite/src/test_xpbyv.c | 4 +- 39 files changed, 1292 insertions(+), 1496 deletions(-) diff --git a/frame/1/bli_l1v_check.c b/frame/1/bli_l1v_check.c index 737fbaceb..b998a65fb 100644 --- a/frame/1/bli_l1v_check.c +++ b/frame/1/bli_l1v_check.c @@ -62,31 +62,15 @@ GENFRONT( swapv ) void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ - obj_t* x, \ - obj_t* y \ - ) \ -{ \ - bli_l1v_axy_check( alpha, x, y ); \ -} - -GENFRONT( axpyv ) -GENFRONT( scal2v ) - - -#undef GENFRONT -#define GENFRONT( opname ) \ -\ -void PASTEMAC(opname,_check) \ - ( \ obj_t* x, \ obj_t* beta, \ obj_t* y \ ) \ { \ - bli_l1v_xby_check( x, beta, y ); \ + bli_l1v_axby_check( alpha, x, beta, y ); \ } -GENFRONT( xpbyv ) +GENFRONT( axpbyv ) #undef GENFRONT @@ -96,14 +80,14 @@ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* x, \ - obj_t* beta, \ obj_t* y \ ) \ { \ - bli_l1v_axby_check( alpha, x, beta, y ); \ + bli_l1v_axy_check( alpha, x, y ); \ } -GENFRONT( axpbyv ) +GENFRONT( axpyv ) +GENFRONT( scal2v ) #undef GENFRONT @@ -170,6 +154,22 @@ GENFRONT( scalv ) GENFRONT( setv ) +#undef GENFRONT +#define GENFRONT( opname ) \ +\ +void PASTEMAC(opname,_check) \ + ( \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + ) \ +{ \ + bli_l1v_xby_check( x, beta, y ); \ +} + +GENFRONT( xpbyv ) + + // ----------------------------------------------------------------------------- void bli_l1v_xy_check @@ -215,43 +215,43 @@ void bli_l1v_axy_check obj_t* y ) { - err_t e_val; + err_t e_val; - // Check object datatypes. + // Check object datatypes. - e_val = bli_check_noninteger_object( alpha ); - bli_check_error_code( e_val ); + e_val = bli_check_noninteger_object( alpha ); + bli_check_error_code( e_val ); - e_val = bli_check_floating_object( x ); - bli_check_error_code( e_val ); + e_val = bli_check_floating_object( x ); + bli_check_error_code( e_val ); - e_val = bli_check_floating_object( y ); - bli_check_error_code( e_val ); + e_val = bli_check_floating_object( y ); + bli_check_error_code( e_val ); - // Check object dimensions. + // Check object dimensions. - e_val = bli_check_scalar_object( alpha ); - bli_check_error_code( e_val ); + e_val = bli_check_scalar_object( alpha ); + bli_check_error_code( e_val ); - e_val = bli_check_vector_object( x ); - bli_check_error_code( e_val ); + e_val = bli_check_vector_object( x ); + bli_check_error_code( e_val ); - e_val = bli_check_vector_object( y ); - bli_check_error_code( e_val ); + e_val = bli_check_vector_object( y ); + bli_check_error_code( e_val ); - e_val = bli_check_equal_vector_lengths( x, y ); - bli_check_error_code( e_val ); + e_val = bli_check_equal_vector_lengths( x, y ); + bli_check_error_code( e_val ); - // Check object buffers (for non-NULLness). + // Check object buffers (for non-NULLness). - e_val = bli_check_object_buffer( alpha ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( alpha ); + bli_check_error_code( e_val ); - e_val = bli_check_object_buffer( x ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( x ); + bli_check_error_code( e_val ); - e_val = bli_check_object_buffer( y ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( y ); + bli_check_error_code( e_val ); } void bli_l1v_xby_check @@ -261,43 +261,43 @@ void bli_l1v_xby_check obj_t* y ) { - err_t e_val; + err_t e_val; - // Check object datatypes. + // Check object datatypes. - e_val = bli_check_noninteger_object( beta ); - bli_check_error_code( e_val ); + e_val = bli_check_noninteger_object( beta ); + bli_check_error_code( e_val ); - e_val = bli_check_floating_object( x ); - bli_check_error_code( e_val ); + e_val = bli_check_floating_object( x ); + bli_check_error_code( e_val ); - e_val = bli_check_floating_object( y ); - bli_check_error_code( e_val ); + e_val = bli_check_floating_object( y ); + bli_check_error_code( e_val ); - // Check object dimensions. + // Check object dimensions. - e_val = bli_check_scalar_object( beta ); - bli_check_error_code( e_val ); + e_val = bli_check_scalar_object( beta ); + bli_check_error_code( e_val ); - e_val = bli_check_vector_object( x ); - bli_check_error_code( e_val ); + e_val = bli_check_vector_object( x ); + bli_check_error_code( e_val ); - e_val = bli_check_vector_object( y ); - bli_check_error_code( e_val ); + e_val = bli_check_vector_object( y ); + bli_check_error_code( e_val ); - e_val = bli_check_equal_vector_lengths( x, y ); - bli_check_error_code( e_val ); + e_val = bli_check_equal_vector_lengths( x, y ); + bli_check_error_code( e_val ); - // Check object buffers (for non-NULLness). + // Check object buffers (for non-NULLness). - e_val = bli_check_object_buffer( beta ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( beta ); + bli_check_error_code( e_val ); - e_val = bli_check_object_buffer( x ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( x ); + bli_check_error_code( e_val ); - e_val = bli_check_object_buffer( y ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( y ); + bli_check_error_code( e_val ); } void bli_l1v_axby_check @@ -308,52 +308,52 @@ void bli_l1v_axby_check obj_t* y ) { - err_t e_val; + err_t e_val; - // Check object datatypes. + // Check object datatypes. - e_val = bli_check_noninteger_object( alpha ); - bli_check_error_code( e_val ); + e_val = bli_check_noninteger_object( alpha ); + bli_check_error_code( e_val ); - e_val = bli_check_noninteger_object( beta ); - bli_check_error_code( e_val ); + e_val = bli_check_noninteger_object( beta ); + bli_check_error_code( e_val ); - e_val = bli_check_floating_object( x ); - bli_check_error_code( e_val ); + e_val = bli_check_floating_object( x ); + bli_check_error_code( e_val ); - e_val = bli_check_floating_object( y ); - bli_check_error_code( e_val ); + e_val = bli_check_floating_object( y ); + bli_check_error_code( e_val ); - // Check object dimensions. + // Check object dimensions. - e_val = bli_check_scalar_object( alpha ); - bli_check_error_code( e_val ); + e_val = bli_check_scalar_object( alpha ); + bli_check_error_code( e_val ); - e_val = bli_check_scalar_object( beta ); - bli_check_error_code( e_val ); + e_val = bli_check_scalar_object( beta ); + bli_check_error_code( e_val ); - e_val = bli_check_vector_object( x ); - bli_check_error_code( e_val ); + e_val = bli_check_vector_object( x ); + bli_check_error_code( e_val ); - e_val = bli_check_vector_object( y ); - bli_check_error_code( e_val ); + e_val = bli_check_vector_object( y ); + bli_check_error_code( e_val ); - e_val = bli_check_equal_vector_lengths( x, y ); - bli_check_error_code( e_val ); + e_val = bli_check_equal_vector_lengths( x, y ); + bli_check_error_code( e_val ); - // Check object buffers (for non-NULLness). + // Check object buffers (for non-NULLness). - e_val = bli_check_object_buffer( alpha ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( alpha ); + bli_check_error_code( e_val ); - e_val = bli_check_object_buffer( beta ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( beta ); + bli_check_error_code( e_val ); - e_val = bli_check_object_buffer( x ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( x ); + bli_check_error_code( e_val ); - e_val = bli_check_object_buffer( y ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( y ); + bli_check_error_code( e_val ); } void bli_l1v_dot_check diff --git a/frame/1/bli_l1v_check.h b/frame/1/bli_l1v_check.h index 1c87f5f51..d4a1e9ff9 100644 --- a/frame/1/bli_l1v_check.h +++ b/frame/1/bli_l1v_check.h @@ -58,25 +58,12 @@ GENTPROT( swapv ) void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ - obj_t* x, \ - obj_t* y \ - ); - -GENTPROT( axpyv ) -GENTPROT( scal2v ) - - -#undef GENTPROT -#define GENTPROT( opname ) \ -\ -void PASTEMAC(opname,_check) \ - ( \ obj_t* x, \ obj_t* beta, \ obj_t* y \ ); -GENTPROT( xpbyv ) +GENTPROT( axpbyv ) #undef GENTPROT @@ -86,11 +73,11 @@ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* x, \ - obj_t* beta, \ obj_t* y \ - ); + ); -GENTPROT( axpbyv ) +GENTPROT( axpyv ) +GENTPROT( scal2v ) #undef GENTPROT @@ -145,6 +132,20 @@ GENTPROT( scalv ) GENTPROT( setv ) +#undef GENTPROT +#define GENTPROT( opname ) \ +\ +void PASTEMAC(opname,_check) \ + ( \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + ); + +GENTPROT( xpbyv ) + + + // ----------------------------------------------------------------------------- void bli_l1v_xy_check diff --git a/frame/1/bli_l1v_cntx.c b/frame/1/bli_l1v_cntx.c index d1c504528..a1bba0354 100644 --- a/frame/1/bli_l1v_cntx.c +++ b/frame/1/bli_l1v_cntx.c @@ -64,23 +64,48 @@ GENFRONT( subv, BLIS_SUBV_KER ) GENFRONT( swapv, BLIS_SWAPV_KER ) +#undef GENFRONT +#define GENFRONT( opname, kertype, dep1, dep2, dep3, dep4 ) \ +\ +void PASTEMAC(opname,_cntx_init)( cntx_t* cntx ) \ +{ \ + bli_cntx_obj_create( cntx ); \ +\ + /* Initialize the context with kernel dependencies. */ \ + PASTEMAC(dep1,_cntx_init)( cntx ); \ + PASTEMAC(dep2,_cntx_init)( cntx ); \ + PASTEMAC(dep3,_cntx_init)( cntx ); \ + PASTEMAC(dep4,_cntx_init)( cntx ); \ +\ + /* Initialize the context with the kernel associated with the current + operation. */ \ + bli_gks_cntx_set_l1v_ker( kertype, cntx ); \ +} \ +void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ) \ +{ \ + bli_cntx_obj_free( cntx ); \ +} + +GENFRONT( axpbyv, BLIS_AXPBYV_KER, axpyv, xpbyv, scal2v, scalv ) + + #undef GENFRONT #define GENFRONT( opname, kertype, depname ) \ \ void PASTEMAC(opname,_cntx_init)( cntx_t* cntx ) \ { \ - bli_cntx_obj_create( cntx ); \ + bli_cntx_obj_create( cntx ); \ \ - /* Initialize the context with kernel dependencies. */ \ - PASTEMAC(depname,_cntx_init)( cntx ); \ + /* Initialize the context with kernel dependencies. */ \ + PASTEMAC(depname,_cntx_init)( cntx ); \ \ - /* Initialize the context with the kernel associated with the current - operation. */ \ - bli_gks_cntx_set_l1v_ker( kertype, cntx ); \ + /* Initialize the context with the kernel associated with the current + operation. */ \ + bli_gks_cntx_set_l1v_ker( kertype, cntx ); \ } \ void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ) \ { \ - bli_cntx_obj_free( cntx ); \ + bli_cntx_obj_free( cntx ); \ } GENFRONT( axpyv, BLIS_AXPYV_KER, addv ) @@ -92,46 +117,21 @@ GENFRONT( scalv, BLIS_SCALV_KER, setv ) \ void PASTEMAC(opname,_cntx_init)( cntx_t* cntx ) \ { \ - bli_cntx_obj_create( cntx ); \ + bli_cntx_obj_create( cntx ); \ \ - /* Initialize the context with kernel dependencies. */ \ - PASTEMAC(dep1,_cntx_init)( cntx ); \ - PASTEMAC(dep2,_cntx_init)( cntx ); \ + /* Initialize the context with kernel dependencies. */ \ + PASTEMAC(dep1,_cntx_init)( cntx ); \ + PASTEMAC(dep2,_cntx_init)( cntx ); \ \ - /* Initialize the context with the kernel associated with the current - operation. */ \ - bli_gks_cntx_set_l1v_ker( kertype, cntx ); \ + /* Initialize the context with the kernel associated with the current + operation. */ \ + bli_gks_cntx_set_l1v_ker( kertype, cntx ); \ } \ void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ) \ { \ - bli_cntx_obj_free( cntx ); \ + bli_cntx_obj_free( cntx ); \ } GENFRONT( scal2v, BLIS_SCAL2V_KER, setv, copyv ) GENFRONT( xpbyv, BLIS_XPBYV_KER, addv, copyv ) - -#undef GENFRONT -#define GENFRONT( opname, kertype, dep1, dep2, dep3, dep4 ) \ -\ -void PASTEMAC(opname,_cntx_init)( cntx_t* cntx ) \ -{ \ - bli_cntx_obj_create( cntx ); \ -\ - /* Initialize the context with kernel dependencies. */ \ - PASTEMAC(dep1,_cntx_init)( cntx ); \ - PASTEMAC(dep2,_cntx_init)( cntx ); \ - PASTEMAC(dep3,_cntx_init)( cntx ); \ - PASTEMAC(dep4,_cntx_init)( cntx ); \ -\ - /* Initialize the context with the kernel associated with the current - operation. */ \ - bli_gks_cntx_set_l1v_ker( kertype, cntx ); \ -} \ -void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ) \ -{ \ - bli_cntx_obj_free( cntx ); \ -} - -GENFRONT( axpbyv, BLIS_AXPBYV_KER, axpyv, xpbyv, scal2v, scalv ) - diff --git a/frame/1/bli_l1v_ft.h b/frame/1/bli_l1v_ft.h index 051ca0f6c..c4e206df7 100644 --- a/frame/1/bli_l1v_ft.h +++ b/frame/1/bli_l1v_ft.h @@ -49,8 +49,8 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \ ( \ conj_t conjx, \ dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ); @@ -58,41 +58,6 @@ INSERT_GENTDEF( addv ) INSERT_GENTDEF( copyv ) INSERT_GENTDEF( subv ) -// axpyv, scal2v - -#undef GENTDEF -#define GENTDEF( ctype, ch, opname, tsuf ) \ -\ -typedef void (*PASTECH2(ch,opname,tsuf)) \ - ( \ - conj_t conjx, \ - dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ - ); - -INSERT_GENTDEF( axpyv ) -INSERT_GENTDEF( scal2v ) - -// xpybv - -#undef GENTDEF -#define GENTDEF( ctype, ch, opname, tsuf ) \ -\ -typedef void (*PASTECH2(ch,opname,tsuf)) \ - ( \ - conj_t conjx, \ - dim_t n, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ - ); - -INSERT_GENTDEF( xpbyv ) - // axpbyv #undef GENTDEF @@ -102,15 +67,33 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \ ( \ conj_t conjx, \ dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ); INSERT_GENTDEF( axpbyv ) +// axpyv, scal2v + +#undef GENTDEF +#define GENTDEF( ctype, ch, opname, tsuf ) \ +\ +typedef void (*PASTECH2(ch,opname,tsuf)) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ + ); + +INSERT_GENTDEF( axpyv ) +INSERT_GENTDEF( scal2v ) + // dotv #undef GENTDEF @@ -121,9 +104,9 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* rho, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict rho, \ cntx_t* cntx \ ); @@ -139,11 +122,11 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* beta, \ - ctype* rho, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict beta, \ + ctype* restrict rho, \ cntx_t* cntx \ ); @@ -157,7 +140,7 @@ INSERT_GENTDEF( dotxv ) typedef void (*PASTECH2(ch,opname,tsuf)) \ ( \ dim_t n, \ - ctype* x, inc_t incx, \ + ctype* restrict x, inc_t incx, \ cntx_t* cntx \ ); @@ -172,8 +155,8 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \ ( \ conj_t conjalpha, \ dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ cntx_t* cntx \ ); @@ -188,14 +171,29 @@ INSERT_GENTDEF( setv ) typedef void (*PASTECH2(ch,opname,tsuf)) \ ( \ dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ); INSERT_GENTDEF( swapv ) +// xpybv +#undef GENTDEF +#define GENTDEF( ctype, ch, opname, tsuf ) \ +\ +typedef void (*PASTECH2(ch,opname,tsuf)) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ + ); + +INSERT_GENTDEF( xpbyv ) #endif diff --git a/frame/1/bli_l1v_ker.h b/frame/1/bli_l1v_ker.h index 13c675215..cf80eda46 100644 --- a/frame/1/bli_l1v_ker.h +++ b/frame/1/bli_l1v_ker.h @@ -42,11 +42,11 @@ \ void PASTEMAC(ch,opname) \ ( \ - conj_t conjx, \ - dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ + conj_t conjx, \ + dim_t n, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ ); INSERT_GENTPROT_BASIC( addv_ker_name ) @@ -59,12 +59,29 @@ INSERT_GENTPROT_BASIC( subv_ker_name ) \ void PASTEMAC(ch,opname) \ ( \ - conj_t conjx, \ - dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ + conj_t conjx, \ + dim_t n, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ + ); \ + +INSERT_GENTPROT_BASIC( axpbyv_ker_name ) + + +#undef GENTPROT +#define GENTPROT( ctype, ch, opname ) \ +\ +void PASTEMAC(ch,opname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ ); \ INSERT_GENTPROT_BASIC( axpyv_ker_name ) @@ -76,46 +93,13 @@ INSERT_GENTPROT_BASIC( scal2v_ker_name ) \ void PASTEMAC(ch,opname) \ ( \ - conj_t conjx, \ - dim_t n, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ - ); \ - -INSERT_GENTPROT_BASIC( xpbyv_ker_name ) - - -#undef GENTPROT -#define GENTPROT( ctype, ch, opname ) \ -\ -void PASTEMAC(ch,opname) \ - ( \ - conj_t conjx, \ - dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ - ); \ - -INSERT_GENTPROT_BASIC( axpbyv_ker_name ) - - -#undef GENTPROT -#define GENTPROT( ctype, ch, opname ) \ -\ -void PASTEMAC(ch,opname) \ - ( \ - conj_t conjx, \ - conj_t conjy, \ - dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* rho, \ - cntx_t* cntx \ + conj_t conjx, \ + conj_t conjy, \ + dim_t n, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict rho, \ + cntx_t* cntx \ ); \ INSERT_GENTPROT_BASIC( dotv_ker_name ) @@ -126,15 +110,15 @@ INSERT_GENTPROT_BASIC( dotv_ker_name ) \ void PASTEMAC(ch,opname) \ ( \ - conj_t conjx, \ - conj_t conjy, \ - dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* beta, \ - ctype* rho, \ - cntx_t* cntx \ + conj_t conjx, \ + conj_t conjy, \ + dim_t n, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict beta, \ + ctype* restrict rho, \ + cntx_t* cntx \ ); \ INSERT_GENTPROT_BASIC( dotxv_ker_name ) @@ -145,9 +129,9 @@ INSERT_GENTPROT_BASIC( dotxv_ker_name ) \ void PASTEMAC(ch,opname) \ ( \ - dim_t n, \ - ctype* x, inc_t incx, \ - cntx_t* cntx \ + dim_t n, \ + ctype* restrict x, inc_t incx, \ + cntx_t* cntx \ ); \ INSERT_GENTPROT_BASIC( invertv_ker_name ) @@ -158,11 +142,11 @@ INSERT_GENTPROT_BASIC( invertv_ker_name ) \ void PASTEMAC(ch,opname) \ ( \ - conj_t conjalpha, \ - dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - cntx_t* cntx \ + conj_t conjalpha, \ + dim_t n, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + cntx_t* cntx \ ); \ INSERT_GENTPROT_BASIC( scalv_ker_name ) @@ -174,11 +158,27 @@ INSERT_GENTPROT_BASIC( setv_ker_name ) \ void PASTEMAC(ch,opname) \ ( \ - dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ + dim_t n, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ ); \ INSERT_GENTPROT_BASIC( swapv_ker_name ) + +#undef GENTPROT +#define GENTPROT( ctype, ch, opname ) \ +\ +void PASTEMAC(ch,opname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ + ); \ + +INSERT_GENTPROT_BASIC( xpbyv_ker_name ) + diff --git a/frame/1/bli_l1v_oapi.c b/frame/1/bli_l1v_oapi.c index c43551236..cebc3bfb5 100644 --- a/frame/1/bli_l1v_oapi.c +++ b/frame/1/bli_l1v_oapi.c @@ -82,6 +82,64 @@ GENFRONT( copyv ) GENFRONT( subv ) +#undef GENFRONT +#define GENFRONT( opname ) \ +\ +void PASTEMAC(opname,EX_SUF) \ + ( \ + obj_t* alpha, \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + BLIS_OAPI_CNTX_PARAM \ + ) \ +{ \ + BLIS_OAPI_CNTX_DECL \ +\ + num_t dt = bli_obj_datatype( *x ); \ +\ + conj_t conjx = bli_obj_conj_status( *x ); \ + dim_t n = bli_obj_vector_dim( *x ); \ + void* buf_x = bli_obj_buffer_at_off( *x ); \ + inc_t inc_x = bli_obj_vector_inc( *x ); \ + void* buf_y = bli_obj_buffer_at_off( *y ); \ + inc_t inc_y = bli_obj_vector_inc( *y ); \ +\ + void* buf_alpha; \ + void* buf_beta; \ +\ + obj_t alpha_local; \ + obj_t beta_local; \ +\ + if ( bli_error_checking_is_enabled() ) \ + PASTEMAC(opname,_check)( alpha, x, beta, y ); \ +\ + /* Create local copy-casts of scalars (and apply internal conjugation + as needed). */ \ + bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ + alpha, &alpha_local ); \ + bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ + beta, &beta_local ); \ + buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \ + buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \ +\ + /* Invoke the void pointer-based function. */ \ + bli_call_ft_9 \ + ( \ + dt, \ + opname, \ + conjx, \ + n, \ + buf_alpha, \ + buf_x, inc_x, \ + buf_beta, \ + buf_y, inc_y, \ + cntx \ + ); \ +} + +GENFRONT( axpbyv ) + #undef GENFRONT #define GENFRONT( opname ) \ @@ -136,119 +194,6 @@ GENFRONT( axpyv ) GENFRONT( scal2v ) - -#undef GENFRONT -#define GENFRONT( opname ) \ -\ -void PASTEMAC(opname,EX_SUF) \ - ( \ - obj_t* x, \ - obj_t* beta, \ - obj_t* y \ - BLIS_OAPI_CNTX_PARAM \ - ) \ -{ \ - BLIS_OAPI_CNTX_DECL \ -\ - num_t dt = bli_obj_datatype( *x ); \ -\ - conj_t conjx = bli_obj_conj_status( *x ); \ - dim_t n = bli_obj_vector_dim( *x ); \ - void* buf_x = bli_obj_buffer_at_off( *x ); \ - inc_t inc_x = bli_obj_vector_inc( *x ); \ - void* buf_y = bli_obj_buffer_at_off( *y ); \ - inc_t inc_y = bli_obj_vector_inc( *y ); \ -\ - void* buf_beta; \ -\ - obj_t beta_local; \ -\ - if ( bli_error_checking_is_enabled() ) \ - PASTEMAC(opname,_check)( x, beta, y ); \ -\ - /* Create local copy-casts of scalars (and apply internal conjugation - as needed). */ \ - bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ - beta, &beta_local ); \ - buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \ -\ - /* Invoke the void pointer-based function. */ \ - bli_call_ft_8 \ - ( \ - dt, \ - opname, \ - conjx, \ - n, \ - buf_x, inc_x, \ - buf_beta, \ - buf_y, inc_y, \ - cntx \ - ); \ -} - -GENFRONT( xpbyv ) - - - -#undef GENFRONT -#define GENFRONT( opname ) \ -\ -void PASTEMAC(opname,EX_SUF) \ - ( \ - obj_t* alpha, \ - obj_t* x, \ - obj_t* beta, \ - obj_t* y \ - BLIS_OAPI_CNTX_PARAM \ - ) \ -{ \ - BLIS_OAPI_CNTX_DECL \ -\ - num_t dt = bli_obj_datatype( *x ); \ -\ - conj_t conjx = bli_obj_conj_status( *x ); \ - dim_t n = bli_obj_vector_dim( *x ); \ - void* buf_x = bli_obj_buffer_at_off( *x ); \ - inc_t inc_x = bli_obj_vector_inc( *x ); \ - void* buf_y = bli_obj_buffer_at_off( *y ); \ - inc_t inc_y = bli_obj_vector_inc( *y ); \ -\ - void* buf_alpha; \ - void* buf_beta; \ -\ - obj_t alpha_local; \ - obj_t beta_local; \ -\ - if ( bli_error_checking_is_enabled() ) \ - PASTEMAC(opname,_check)( alpha, x, beta, y ); \ -\ - /* Create local copy-casts of scalars (and apply internal conjugation - as needed). */ \ - bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ - alpha, &alpha_local ); \ - bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ - beta, &beta_local ); \ - buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \ - buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \ -\ - /* Invoke the void pointer-based function. */ \ - bli_call_ft_9 \ - ( \ - dt, \ - opname, \ - conjx, \ - n, \ - buf_alpha, \ - buf_x, inc_x, \ - buf_beta, \ - buf_y, inc_y, \ - cntx \ - ); \ -} - -GENFRONT( axpbyv ) - - #undef GENFRONT #define GENFRONT( opname ) \ \ @@ -479,5 +424,57 @@ void PASTEMAC(opname,EX_SUF) \ GENFRONT( swapv ) +#undef GENFRONT +#define GENFRONT( opname ) \ +\ +void PASTEMAC(opname,EX_SUF) \ + ( \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + BLIS_OAPI_CNTX_PARAM \ + ) \ +{ \ + BLIS_OAPI_CNTX_DECL \ +\ + num_t dt = bli_obj_datatype( *x ); \ +\ + conj_t conjx = bli_obj_conj_status( *x ); \ + dim_t n = bli_obj_vector_dim( *x ); \ + void* buf_x = bli_obj_buffer_at_off( *x ); \ + inc_t inc_x = bli_obj_vector_inc( *x ); \ + void* buf_y = bli_obj_buffer_at_off( *y ); \ + inc_t inc_y = bli_obj_vector_inc( *y ); \ +\ + void* buf_beta; \ +\ + obj_t beta_local; \ +\ + if ( bli_error_checking_is_enabled() ) \ + PASTEMAC(opname,_check)( x, beta, y ); \ +\ + /* Create local copy-casts of scalars (and apply internal conjugation + as needed). */ \ + bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ + beta, &beta_local ); \ + buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \ +\ + /* Invoke the void pointer-based function. */ \ + bli_call_ft_8 \ + ( \ + dt, \ + opname, \ + conjx, \ + n, \ + buf_x, inc_x, \ + buf_beta, \ + buf_y, inc_y, \ + cntx \ + ); \ +} + +GENFRONT( xpbyv ) + + #endif diff --git a/frame/1/bli_l1v_oapi.h b/frame/1/bli_l1v_oapi.h index b6ec5094d..ff277421c 100644 --- a/frame/1/bli_l1v_oapi.h +++ b/frame/1/bli_l1v_oapi.h @@ -58,27 +58,13 @@ GENTPROT( subv ) void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ - obj_t* x, \ - obj_t* y \ - BLIS_OAPI_CNTX_PARAM \ - ); - -GENTPROT( axpyv ) -GENTPROT( scal2v ) - - -#undef GENTPROT -#define GENTPROT( opname ) \ -\ -void PASTEMAC(opname,EX_SUF) \ - ( \ obj_t* x, \ obj_t* beta, \ obj_t* y \ BLIS_OAPI_CNTX_PARAM \ ); -GENTPROT( xpbyv ) +GENTPROT( axpbyv ) #undef GENTPROT @@ -88,12 +74,12 @@ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x, \ - obj_t* beta, \ obj_t* y \ BLIS_OAPI_CNTX_PARAM \ - ); + ); -GENTPROT( axpbyv ) +GENTPROT( axpyv ) +GENTPROT( scal2v ) #undef GENTPROT @@ -164,3 +150,17 @@ void PASTEMAC(opname,EX_SUF) \ GENTPROT( swapv ) + +#undef GENTPROT +#define GENTPROT( opname ) \ +\ +void PASTEMAC(opname,EX_SUF) \ + ( \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + BLIS_OAPI_CNTX_PARAM \ + ); + +GENTPROT( xpbyv ) + diff --git a/frame/1/bli_l1v_tapi.c b/frame/1/bli_l1v_tapi.c index 551a41d18..4cf6be24e 100644 --- a/frame/1/bli_l1v_tapi.c +++ b/frame/1/bli_l1v_tapi.c @@ -74,6 +74,44 @@ INSERT_GENTFUNC_BASIC( copyv, BLIS_COPYV_KER ) INSERT_GENTFUNC_BASIC( subv, BLIS_SUBV_KER ) +#undef GENTFUNC +#define GENTFUNC( ctype, ch, opname, kerid ) \ +\ +void PASTEMAC(ch,opname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* alpha, \ + ctype* x, inc_t incx, \ + ctype* beta, \ + ctype* y, inc_t incy, \ + cntx_t* cntx \ + ) \ +{ \ + const num_t dt = PASTEMAC(ch,type); \ + cntx_t* cntx_p; \ +\ + bli_cntx_init_local_if( opname, cntx, cntx_p ); \ +\ + PASTECH2(ch,opname,_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx_p ); \ +\ + f \ + ( \ + conjx, \ + n, \ + alpha, \ + x, incx, \ + beta, \ + y, incy, \ + cntx_p \ + ); \ +\ + bli_cntx_finalize_local_if( opname, cntx ); \ +} + +INSERT_GENTFUNC_BASIC( axpbyv, BLIS_AXPBYV_KER ) + + #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kerid ) \ \ @@ -111,80 +149,6 @@ INSERT_GENTFUNC_BASIC( axpyv, BLIS_AXPYV_KER ) INSERT_GENTFUNC_BASIC( scal2v, BLIS_SCAL2V_KER ) -#undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, kerid ) \ -\ -void PASTEMAC(ch,opname) \ - ( \ - conj_t conjx, \ - dim_t n, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ - ) \ -{ \ - const num_t dt = PASTEMAC(ch,type); \ - cntx_t* cntx_p; \ -\ - bli_cntx_init_local_if( opname, cntx, cntx_p ); \ -\ - PASTECH2(ch,opname,_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx_p ); \ -\ - f \ - ( \ - conjx, \ - n, \ - x, incx, \ - beta, \ - y, incy, \ - cntx_p \ - ); \ -\ - bli_cntx_finalize_local_if( opname, cntx ); \ -} - -INSERT_GENTFUNC_BASIC( xpbyv, BLIS_XPBYV_KER ) - - -#undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, kerid ) \ -\ -void PASTEMAC(ch,opname) \ - ( \ - conj_t conjx, \ - dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ - ) \ -{ \ - const num_t dt = PASTEMAC(ch,type); \ - cntx_t* cntx_p; \ -\ - bli_cntx_init_local_if( opname, cntx, cntx_p ); \ -\ - PASTECH2(ch,opname,_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx_p ); \ -\ - f \ - ( \ - conjx, \ - n, \ - alpha, \ - x, incx, \ - beta, \ - y, incy, \ - cntx_p \ - ); \ -\ - bli_cntx_finalize_local_if( opname, cntx ); \ -} - -INSERT_GENTFUNC_BASIC( axpbyv, BLIS_AXPBYV_KER ) - - #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kerid ) \ \ @@ -361,3 +325,39 @@ void PASTEMAC(ch,opname) \ INSERT_GENTFUNC_BASIC( swapv, BLIS_SWAPV_KER ) +#undef GENTFUNC +#define GENTFUNC( ctype, ch, opname, kerid ) \ +\ +void PASTEMAC(ch,opname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* x, inc_t incx, \ + ctype* beta, \ + ctype* y, inc_t incy, \ + cntx_t* cntx \ + ) \ +{ \ + const num_t dt = PASTEMAC(ch,type); \ + cntx_t* cntx_p; \ +\ + bli_cntx_init_local_if( opname, cntx, cntx_p ); \ +\ + PASTECH2(ch,opname,_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx_p ); \ +\ + f \ + ( \ + conjx, \ + n, \ + x, incx, \ + beta, \ + y, incy, \ + cntx_p \ + ); \ +\ + bli_cntx_finalize_local_if( opname, cntx ); \ +} + +INSERT_GENTFUNC_BASIC( xpbyv, BLIS_XPBYV_KER ) + + diff --git a/frame/1/bli_l1v_tapi.h b/frame/1/bli_l1v_tapi.h index b50fe8eb9..b4b36b059 100644 --- a/frame/1/bli_l1v_tapi.h +++ b/frame/1/bli_l1v_tapi.h @@ -41,7 +41,7 @@ #define addv_ker_name addv #undef axpbyv_ker_name -#define axpbyv_ker_name axpbyv +#define axpbyv_ker_name axpbyv #undef axpyv_ker_name #define axpyv_ker_name axpyv diff --git a/frame/1/kernels/bli_addv_ref.c b/frame/1/kernels/bli_addv_ref.c index c18748ae9..aaf47859c 100644 --- a/frame/1/kernels/bli_addv_ref.c +++ b/frame/1/kernels/bli_addv_ref.c @@ -39,11 +39,11 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjx, \ - dim_t n, \ + conj_t conjx, \ + dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ - cntx_t* cntx \ + cntx_t* cntx \ ) \ { \ ctype* restrict chi1; \ @@ -57,43 +57,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx ) ) \ { \ - if (incx == 1 && incy == 1) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,addjs)( chi1[i], psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,addjs)( *chi1, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if (incx == 1 && incy == 1) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,addjs)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,addjs)( *chi1, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - if (incx == 1 && incy == 1) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,adds)( chi1[i], psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,adds)( *chi1, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if (incx == 1 && incy == 1) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,adds)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,adds)( *chi1, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_axpbyv_ref.c b/frame/1/kernels/bli_axpbyv_ref.c index 91d92c733..2751640af 100644 --- a/frame/1/kernels/bli_axpbyv_ref.c +++ b/frame/1/kernels/bli_axpbyv_ref.c @@ -54,193 +54,189 @@ void PASTEMAC(ch,varname) \ \ if ( bli_zero_dim1( n ) ) return; \ \ - if ( PASTEMAC(ch,eq0)( *alpha ) ) \ - { \ + if ( PASTEMAC(ch,eq0)( *alpha ) ) \ + { \ + /* If alpha is zero and beta is zero, set to zero. */ \ + if ( PASTEMAC(ch,eq0)( *beta ) ) \ + { \ + ctype* zero = PASTEMAC(ch,0); \ \ - /* If alpha is zero and beta is zero, set to zero. */ \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ - { \ - ctype* zero = PASTEMAC(ch,0); \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,setv_ft) setv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SETV_KER, cntx ); \ \ - /* Query the context for the kernel function pointer. */ \ - const num_t dt = PASTEMAC(ch,type); \ - PASTECH(ch,setv_ft) setv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SETV_KER, cntx ); \ + setv_p \ + ( \ + BLIS_NO_CONJUGATE, \ + n, \ + zero, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + /* If alpha is zero and beta is one, return. */ \ + else if ( PASTEMAC(ch,eq1)( *beta ) ) \ + { \ + return; \ + } \ + /* If alpha is zero, scale by beta. */ \ + else \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,scalv_ft) scalv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SCALV_KER, cntx ); \ \ - setv_p \ - ( \ - BLIS_NO_CONJUGATE, \ - n, \ - zero, \ - y, incy, \ - cntx \ - ); \ - return; \ - } \ - /* If alpha is zero and beta is one, return. */ \ - else if ( PASTEMAC(ch,eq1)( *beta ) ) \ - { \ - return; \ - } \ - /* If alpha is zero, scale by beta. */ \ - else \ - { \ - /* Query the context for the kernel function pointer. */ \ - const num_t dt = PASTEMAC(ch,type); \ - PASTECH(ch,scalv_ft) scalv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SCALV_KER, cntx ); \ + scalv_p \ + ( \ + BLIS_NO_CONJUGATE, \ + n, \ + beta, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ \ - scalv_p \ - ( \ - BLIS_NO_CONJUGATE, \ - n, \ - beta, \ - y, incy, \ - cntx \ - ); \ - return; \ - } \ + } \ + else if ( PASTEMAC(ch,eq1)( *alpha ) ) \ + { \ + /* If alpha is one and beta is zero, copy. */ \ + if ( PASTEMAC(ch,eq0)( *beta ) ) \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \ \ - } \ - else if ( PASTEMAC(ch,eq1)( *alpha ) ) \ - { \ - \ - /* If alpha is one and beta is zero, copy. */ \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ - { \ - /* Query the context for the kernel function pointer. */ \ - const num_t dt = PASTEMAC(ch,type); \ - PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \ - \ - copyv_p \ - ( \ - conjx, \ - n, \ - x, incx, \ - y, incy, \ - cntx \ - ); \ - return; \ - } \ - /* If alpha is one and beta is one, add. */ \ - else if ( PASTEMAC(ch,eq1)( *beta ) ) \ - { \ - /* Query the context for the kernel function pointer. */ \ - const num_t dt = PASTEMAC(ch,type); \ - PASTECH(ch,addv_ft) addv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_ADDV_KER, cntx ); \ - \ - addv_p \ - ( \ - conjx, \ - n, \ - x, incx, \ - y, incy, \ - cntx \ - ); \ - return; \ - } \ - /* If alpha is one, call xpby. */ \ - else \ - { \ - /* Query the context for the kernel function pointer. */ \ - const num_t dt = PASTEMAC(ch,type); \ - PASTECH(ch,xpbyv_ft) xpbyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_XPBYV_KER, cntx ); \ - \ - xpbyv_p \ - ( \ - conjx, \ - n, \ - x, incx, \ - beta, \ - y, incy, \ - cntx \ - ); \ - return; \ - } \ - \ - } \ - else \ - { \ - \ - /* If beta is zero, call scal2. */ \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ - { \ - /* Query the context for the kernel function pointer. */ \ - const num_t dt = PASTEMAC(ch,type); \ - PASTECH(ch,scal2v_ft) scal2v_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SCAL2V_KER, cntx ); \ - \ - scal2v_p \ - ( \ - conjx, \ - n, \ - alpha, \ - x, incx, \ - y, incy, \ - cntx \ - ); \ - return; \ - } \ - /* If beta is one, call axpy. */ \ - else if ( PASTEMAC(ch,eq1)( *beta ) ) \ - { \ - /* Query the context for the kernel function pointer. */ \ - const num_t dt = PASTEMAC(ch,type); \ - PASTECH(ch,axpyv_ft) axpyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \ - \ - axpyv_p \ - ( \ - conjx, \ - n, \ - alpha, \ - x, incx, \ - y, incy, \ - cntx \ - ); \ - return; \ - } \ - \ - } \ + copyv_p \ + ( \ + conjx, \ + n, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + /* If alpha is one and beta is one, add. */ \ + else if ( PASTEMAC(ch,eq1)( *beta ) ) \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,addv_ft) addv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_ADDV_KER, cntx ); \ +\ + addv_p \ + ( \ + conjx, \ + n, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + /* If alpha is one, call xpby. */ \ + else \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,xpbyv_ft) xpbyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_XPBYV_KER, cntx ); \ +\ + xpbyv_p \ + ( \ + conjx, \ + n, \ + x, incx, \ + beta, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + } \ + else \ + { \ + /* If beta is zero, call scal2. */ \ + if ( PASTEMAC(ch,eq0)( *beta ) ) \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,scal2v_ft) scal2v_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SCAL2V_KER, cntx ); \ +\ + scal2v_p \ + ( \ + conjx, \ + n, \ + alpha, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + /* If beta is one, call axpy. */ \ + else if ( PASTEMAC(ch,eq1)( *beta ) ) \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,axpyv_ft) axpyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \ +\ + axpyv_p \ + ( \ + conjx, \ + n, \ + alpha, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + \ + } \ \ chi1 = x; \ psi1 = y; \ \ if ( bli_is_conj( conjx ) ) \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,axpbyjs)( *alpha, chi1[i], *beta, psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,axpbyjs)( *alpha, *chi1, *beta, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpbyjs)( *alpha, chi1[i], *beta, psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpbyjs)( *alpha, *chi1, *beta, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,axpbys)( *alpha, chi1[i], *beta, psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,axpbys)( *alpha, *chi1, *beta, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpbys)( *alpha, chi1[i], *beta, psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpbys)( *alpha, *chi1, *beta, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_axpyv_ref.c b/frame/1/kernels/bli_axpyv_ref.c index a6dd49976..f2733d7bc 100644 --- a/frame/1/kernels/bli_axpyv_ref.c +++ b/frame/1/kernels/bli_axpyv_ref.c @@ -39,12 +39,12 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjx, \ - dim_t n, \ + conj_t conjx, \ + dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ - cntx_t* cntx \ + cntx_t* cntx \ ) \ { \ ctype* restrict chi1; \ @@ -79,43 +79,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx ) ) \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,axpyjs)( *alpha, chi1[i], psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,axpyjs)( *alpha, *chi1, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpyjs)( *alpha, chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpyjs)( *alpha, *chi1, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,axpys)( *alpha, chi1[i], psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,axpys)( *alpha, *chi1, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpys)( *alpha, chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpys)( *alpha, *chi1, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_copyv_ref.c b/frame/1/kernels/bli_copyv_ref.c index 28d22c7b2..e364de57f 100644 --- a/frame/1/kernels/bli_copyv_ref.c +++ b/frame/1/kernels/bli_copyv_ref.c @@ -39,11 +39,11 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjx, \ - dim_t n, \ + conj_t conjx, \ + dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ - cntx_t* cntx \ + cntx_t* cntx \ ) \ { \ ctype* restrict chi1; \ @@ -57,43 +57,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx ) ) \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,copyjs)( chi1[i], psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,copyjs)( *chi1, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,copyjs)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,copyjs)( *chi1, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,copys)( chi1[i], psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,copys)( *chi1, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,copys)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,copys)( *chi1, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_dotv_ref.c b/frame/1/kernels/bli_dotv_ref.c index 6f790c81c..61a4784da 100644 --- a/frame/1/kernels/bli_dotv_ref.c +++ b/frame/1/kernels/bli_dotv_ref.c @@ -39,13 +39,13 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjx, \ - conj_t conjy, \ - dim_t n, \ + conj_t conjx, \ + conj_t conjy, \ + dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ ctype* restrict rho, \ - cntx_t* cntx \ + cntx_t* cntx \ ) \ { \ ctype* restrict chi1; \ @@ -75,43 +75,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx_use ) ) \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,dotjs)( chi1[i], psi1[i], dotxy ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,dotjs)( *chi1, *psi1, dotxy ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dotjs)( chi1[i], psi1[i], dotxy ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dotjs)( *chi1, *psi1, dotxy ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,dots)( chi1[i], psi1[i], dotxy ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,dots)( *chi1, *psi1, dotxy ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dots)( chi1[i], psi1[i], dotxy ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dots)( *chi1, *psi1, dotxy ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ \ if ( bli_is_conj( conjy ) ) \ diff --git a/frame/1/kernels/bli_dotxv_ref.c b/frame/1/kernels/bli_dotxv_ref.c index 89dedcc48..3e26f4ee8 100644 --- a/frame/1/kernels/bli_dotxv_ref.c +++ b/frame/1/kernels/bli_dotxv_ref.c @@ -39,15 +39,15 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjx, \ - conj_t conjy, \ - dim_t n, \ + conj_t conjx, \ + conj_t conjy, \ + dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ ctype* restrict beta, \ ctype* restrict rho, \ - cntx_t* cntx \ + cntx_t* cntx \ ) \ { \ ctype* restrict chi1; \ @@ -83,43 +83,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx_use ) ) \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,dotjs)( chi1[i], psi1[i], dotxy ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,dotjs)( *chi1, *psi1, dotxy ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dotjs)( chi1[i], psi1[i], dotxy ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dotjs)( *chi1, *psi1, dotxy ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,dots)( chi1[i], psi1[i], dotxy ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,dots)( *chi1, *psi1, dotxy ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dots)( chi1[i], psi1[i], dotxy ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dots)( *chi1, *psi1, dotxy ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ \ if ( bli_is_conj( conjy ) ) \ diff --git a/frame/1/kernels/bli_invertv_ref.c b/frame/1/kernels/bli_invertv_ref.c index a79d8c9f0..9585a970c 100644 --- a/frame/1/kernels/bli_invertv_ref.c +++ b/frame/1/kernels/bli_invertv_ref.c @@ -39,9 +39,9 @@ \ void PASTEMAC(ch,varname) \ ( \ - dim_t n, \ + dim_t n, \ ctype* restrict x, inc_t incx, \ - cntx_t* cntx \ + cntx_t* cntx \ ) \ { \ ctype* restrict chi1; \ @@ -51,22 +51,22 @@ void PASTEMAC(ch,varname) \ \ chi1 = x; \ \ - if ( incx == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,inverts)( chi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,inverts)( *chi1 ); \ - \ - chi1 += incx; \ - } \ - } \ + if ( incx == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,inverts)( chi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,inverts)( *chi1 ); \ + \ + chi1 += incx; \ + } \ + } \ } INSERT_GENTFUNC_BASIC0( invertv_ref ) diff --git a/frame/1/kernels/bli_l1v_ref.h b/frame/1/kernels/bli_l1v_ref.h index 60cdbd2ee..51eb5b6c4 100644 --- a/frame/1/kernels/bli_l1v_ref.h +++ b/frame/1/kernels/bli_l1v_ref.h @@ -32,149 +32,48 @@ */ +// Redefine level-1v kernel API names to induce prototypes. -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conjx, \ - dim_t n, \ - ctype* restrict x, inc_t incx, \ - ctype* restrict y, inc_t incy, \ - cntx_t* cntx \ - ); +#undef addv_ker_name +#define addv_ker_name addv_ref -INSERT_GENTPROT_BASIC( addv_ref ) -INSERT_GENTPROT_BASIC( copyv_ref ) -INSERT_GENTPROT_BASIC( subv_ref ) +#undef axpbyv_ker_name +#define axpbyv_ker_name axpbyv_ref +#undef axpyv_ker_name +#define axpyv_ker_name axpyv_ref -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conjx, \ - dim_t n, \ - ctype* restrict alpha, \ - ctype* restrict x, inc_t incx, \ - ctype* restrict y, inc_t incy, \ - cntx_t* cntx \ - ); +#undef copyv_ker_name +#define copyv_ker_name copyv_ref -INSERT_GENTPROT_BASIC( axpyv_ref ) -INSERT_GENTPROT_BASIC( scal2v_ref ) +#undef dotv_ker_name +#define dotv_ker_name dotv_ref +#undef dotxv_ker_name +#define dotxv_ker_name dotxv_ref -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conjx, \ - dim_t n, \ - ctype* restrict x, inc_t incx, \ - ctype* restrict beta, \ - ctype* restrict y, inc_t incy, \ - cntx_t* cntx \ - ); +#undef invertv_ker_name +#define invertv_ker_name invertv_ref -INSERT_GENTPROT_BASIC( xpbyv_ref ) +#undef scalv_ker_name +#define scalv_ker_name scalv_ref +#undef scal2v_ker_name +#define scal2v_ker_name scal2v_ref -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conjx, \ - dim_t n, \ - ctype* restrict alpha, \ - ctype* restrict x, inc_t incx, \ - ctype* restrict beta, \ - ctype* restrict y, inc_t incy, \ - cntx_t* cntx \ - ); +#undef setv_ker_name +#define setv_ker_name setv_ref -INSERT_GENTPROT_BASIC( axpbyv_ref ) +#undef subv_ker_name +#define subv_ker_name subv_ref +#undef swapv_ker_name +#define swapv_ker_name swapv_ref -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conjx, \ - conj_t conjy, \ - dim_t n, \ - ctype* restrict x, inc_t incx, \ - ctype* restrict y, inc_t incy, \ - ctype* restrict rho, \ - cntx_t* cntx \ - ); +#undef xpbyv_ker_name +#define xpbyv_ker_name xpbyv_ref -INSERT_GENTPROT_BASIC( dotv_ref ) +// Include the level-1v kernel API template. - -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conjx, \ - conj_t conjy, \ - dim_t n, \ - ctype* restrict alpha, \ - ctype* restrict x, inc_t incx, \ - ctype* restrict y, inc_t incy, \ - ctype* restrict beta, \ - ctype* restrict rho, \ - cntx_t* cntx \ - ); - -INSERT_GENTPROT_BASIC( dotxv_ref ) - - -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - dim_t n, \ - ctype* restrict x, inc_t incx, \ - cntx_t* cntx \ - ); - -INSERT_GENTPROT_BASIC( invertv_ref ) - - -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conjalpha, \ - dim_t n, \ - ctype* restrict alpha, \ - ctype* restrict x, inc_t incx, \ - cntx_t* cntx \ - ); - -INSERT_GENTPROT_BASIC( scalv_ref ) -INSERT_GENTPROT_BASIC( setv_ref ) - - -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - dim_t n, \ - ctype* restrict x, inc_t incx, \ - ctype* restrict y, inc_t incy, \ - cntx_t* cntx \ - ); - -INSERT_GENTPROT_BASIC( swapv_ref ) +#include "bli_l1v_ker.h" diff --git a/frame/1/kernels/bli_scal2v_ref.c b/frame/1/kernels/bli_scal2v_ref.c index f4bc0d541..cb874d4ba 100644 --- a/frame/1/kernels/bli_scal2v_ref.c +++ b/frame/1/kernels/bli_scal2v_ref.c @@ -39,12 +39,12 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjx, \ - dim_t n, \ + conj_t conjx, \ + dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ - cntx_t* cntx \ + cntx_t* cntx \ ) \ { \ ctype* restrict chi1; \ @@ -72,66 +72,66 @@ void PASTEMAC(ch,varname) \ ); \ return; \ } \ - /* If alpha is one, use copyv. */ \ - else if ( PASTEMAC(ch,eq0)( *alpha ) ) \ - { \ - /* Query the context for the kernel function pointer. */ \ - const num_t dt = PASTEMAC(ch,type); \ - PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \ + /* If alpha is one, use copyv. */ \ + else if ( PASTEMAC(ch,eq0)( *alpha ) ) \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \ \ - copyv_p \ - ( \ - BLIS_NO_CONJUGATE, \ - n, \ - x, incx, \ - y, incy, \ - cntx \ - ); \ - return; \ - } \ + copyv_p \ + ( \ + BLIS_NO_CONJUGATE, \ + n, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ \ chi1 = x; \ psi1 = y; \ \ if ( bli_is_conj( conjx ) ) \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,scal2js)( *alpha, chi1[i], psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,scal2js)( *alpha, *chi1, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,scal2js)( *alpha, chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,scal2js)( *alpha, *chi1, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,scal2s)( *alpha, chi1[i], psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,scal2s)( *alpha, *chi1, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,scal2s)( *alpha, chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,scal2s)( *alpha, *chi1, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_scalv_ref.c b/frame/1/kernels/bli_scalv_ref.c index 7d238c6f4..cc6817a43 100644 --- a/frame/1/kernels/bli_scalv_ref.c +++ b/frame/1/kernels/bli_scalv_ref.c @@ -39,11 +39,11 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjalpha, \ - dim_t n, \ + conj_t conjalpha, \ + dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ - cntx_t* cntx \ + cntx_t* cntx \ ) \ { \ ctype* restrict chi1; \ @@ -79,22 +79,22 @@ void PASTEMAC(ch,varname) \ \ chi1 = x; \ \ - if ( incx == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,scals)( alpha_conj, chi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,scals)( alpha_conj, *chi1 ); \ - \ - chi1 += incx; \ - } \ - } \ + if ( incx == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,scals)( alpha_conj, chi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,scals)( alpha_conj, *chi1 ); \ +\ + chi1 += incx; \ + } \ + } \ } INSERT_GENTFUNC_BASIC0( scalv_ref ) diff --git a/frame/1/kernels/bli_setv_ref.c b/frame/1/kernels/bli_setv_ref.c index 4f7d1db2d..19b5c4ffb 100644 --- a/frame/1/kernels/bli_setv_ref.c +++ b/frame/1/kernels/bli_setv_ref.c @@ -39,11 +39,11 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjalpha, \ - dim_t n, \ + conj_t conjalpha, \ + dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ - cntx_t* cntx \ + cntx_t* cntx \ ) \ { \ ctype* restrict chi1; \ @@ -56,43 +56,43 @@ void PASTEMAC(ch,varname) \ \ if ( PASTEMAC(ch,eq0)( *alpha ) ) \ { \ - if ( incx == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,set0s)( chi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,set0s)( *chi1 ); \ - \ - chi1 += incx; \ - } \ - } \ + if ( incx == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,set0s)( chi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,set0s)( *chi1 ); \ +\ + chi1 += incx; \ + } \ + } \ } \ else \ { \ PASTEMAC(ch,copycjs)( conjalpha, *alpha, alpha_conj ); \ \ - if ( incx == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,copys)( alpha_conj, chi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,copys)( alpha_conj, *chi1 ); \ - \ - chi1 += incx; \ - } \ - } \ + if ( incx == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,copys)( alpha_conj, chi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,copys)( alpha_conj, *chi1 ); \ +\ + chi1 += incx; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_subv_ref.c b/frame/1/kernels/bli_subv_ref.c index bc59b01b6..cde01638a 100644 --- a/frame/1/kernels/bli_subv_ref.c +++ b/frame/1/kernels/bli_subv_ref.c @@ -39,11 +39,11 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjx, \ - dim_t n, \ + conj_t conjx, \ + dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ - cntx_t* cntx \ + cntx_t* cntx \ ) \ { \ ctype* restrict chi1; \ @@ -57,43 +57,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx ) ) \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,subjs)( chi1[i], psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,subjs)( *chi1, *psi1 ); \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,subjs)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,subjs)( *chi1, *psi1 ); \ \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,subs)( chi1[i], psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,subs)( *chi1, *psi1 ); \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,subs)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,subs)( *chi1, *psi1 ); \ \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_swapv_ref.c b/frame/1/kernels/bli_swapv_ref.c index 6a43fb0d0..495ffd57b 100644 --- a/frame/1/kernels/bli_swapv_ref.c +++ b/frame/1/kernels/bli_swapv_ref.c @@ -39,10 +39,10 @@ \ void PASTEMAC(ch,varname) \ ( \ - dim_t n, \ + dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ - cntx_t* cntx \ + cntx_t* cntx \ ) \ { \ ctype* restrict chi1; \ @@ -54,23 +54,23 @@ void PASTEMAC(ch,varname) \ chi1 = x; \ psi1 = y; \ \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,swaps)( chi1[i], psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,swaps)( *chi1, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,swaps)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,swaps)( *chi1, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } INSERT_GENTFUNC_BASIC0( swapv_ref ) diff --git a/frame/1/kernels/bli_xpbyv_ref.c b/frame/1/kernels/bli_xpbyv_ref.c index 508e06ce8..e6d1b1ad5 100644 --- a/frame/1/kernels/bli_xpbyv_ref.c +++ b/frame/1/kernels/bli_xpbyv_ref.c @@ -39,12 +39,12 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjx, \ - dim_t n, \ + conj_t conjx, \ + dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict beta, \ ctype* restrict y, inc_t incy, \ - cntx_t* cntx \ + cntx_t* cntx \ ) \ { \ ctype* restrict chi1; \ @@ -55,21 +55,21 @@ void PASTEMAC(ch,varname) \ \ /* If beta is zero, use copyv. */ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ - { \ - /* Query the context for the kernel function pointer. */ \ - const num_t dt = PASTEMAC(ch,type); \ - PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \ \ - copyv_p \ - ( \ - conjx, \ - n, \ - x, incx, \ - y, incy, \ - cntx \ - ); \ - return; \ - } \ + copyv_p \ + ( \ + conjx, \ + n, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ /* If alpha is one, use addv. */ \ else if ( PASTEMAC(ch,eq1)( *beta ) ) \ { \ @@ -93,43 +93,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx ) ) \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,xpbyjs)( chi1[i], *beta, psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,xpbyjs)( *chi1, *beta, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,xpbyjs)( chi1[i], *beta, psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,xpbyjs)( *chi1, *beta, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,xpbys)( chi1[i], *beta, psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,xpbys)( *chi1, *beta, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,xpbys)( chi1[i], *beta, psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,xpbys)( *chi1, *beta, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ } diff --git a/frame/1f/bli_l1f_ft.h b/frame/1f/bli_l1f_ft.h index f8d15fc3c..5fa688b5d 100644 --- a/frame/1f/bli_l1f_ft.h +++ b/frame/1f/bli_l1f_ft.h @@ -50,11 +50,11 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ - ctype* alpha1, \ - ctype* alpha2, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* z, inc_t incz, \ + ctype* restrict alpha1, \ + ctype* restrict alpha2, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict z, inc_t incz, \ cntx_t* cntx \ ); @@ -71,10 +71,10 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \ conj_t conjx, \ dim_t m, \ dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ + ctype* restrict alpha, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ); @@ -91,11 +91,11 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \ conj_t conjx, \ conj_t conjy, \ dim_t m, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* rho, \ - ctype* z, inc_t incz, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict rho, \ + ctype* restrict z, inc_t incz, \ cntx_t* cntx \ ); @@ -112,11 +112,11 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \ conj_t conjx, \ dim_t m, \ dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ + ctype* restrict alpha, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ); @@ -135,13 +135,13 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \ conj_t conjx, \ dim_t m, \ dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* w, inc_t incw, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - ctype* z, inc_t incz, \ + ctype* restrict alpha, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict w, inc_t incw, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict z, inc_t incz, \ cntx_t* cntx \ ); diff --git a/frame/1f/bli_l1f_ker.h b/frame/1f/bli_l1f_ker.h index 953aaf0af..9c040490e 100644 --- a/frame/1f/bli_l1f_ker.h +++ b/frame/1f/bli_l1f_ker.h @@ -42,15 +42,15 @@ \ void PASTEMAC(ch,opname) \ ( \ - conj_t conjx, \ - conj_t conjy, \ - dim_t n, \ - ctype* alphax, \ - ctype* alphay, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* z, inc_t incz, \ - cntx_t* cntx \ + conj_t conjx, \ + conj_t conjy, \ + dim_t n, \ + ctype* restrict alphax, \ + ctype* restrict alphay, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict z, inc_t incz, \ + cntx_t* cntx \ ); INSERT_GENTPROT_BASIC( axpy2v_ker_name ) @@ -61,15 +61,15 @@ INSERT_GENTPROT_BASIC( axpy2v_ker_name ) \ void PASTEMAC(ch,opname) \ ( \ - conj_t conja, \ - conj_t conjx, \ - dim_t m, \ - dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ + conj_t conja, \ + conj_t conjx, \ + dim_t m, \ + dim_t b_n, \ + ctype* restrict alpha, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ ); INSERT_GENTPROT_BASIC( axpyf_ker_name ) @@ -80,16 +80,16 @@ INSERT_GENTPROT_BASIC( axpyf_ker_name ) \ void PASTEMAC(ch,opname) \ ( \ - conj_t conjxt, \ - conj_t conjx, \ - conj_t conjy, \ - dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* rho, \ - ctype* z, inc_t incz, \ - cntx_t* cntx \ + conj_t conjxt, \ + conj_t conjx, \ + conj_t conjy, \ + dim_t n, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict rho, \ + ctype* restrict z, inc_t incz, \ + cntx_t* cntx \ ); INSERT_GENTPROT_BASIC( dotaxpyv_ker_name ) @@ -100,20 +100,20 @@ INSERT_GENTPROT_BASIC( dotaxpyv_ker_name ) \ void PASTEMAC(ch,opname) \ ( \ - conj_t conjat, \ - conj_t conja, \ - conj_t conjw, \ - conj_t conjx, \ - dim_t m, \ - dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* w, inc_t incw, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - ctype* z, inc_t incz, \ - cntx_t* cntx \ + conj_t conjat, \ + conj_t conja, \ + conj_t conjw, \ + conj_t conjx, \ + dim_t m, \ + dim_t b_n, \ + ctype* restrict alpha, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict w, inc_t incw, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict z, inc_t incz, \ + cntx_t* cntx \ ); INSERT_GENTPROT_BASIC( dotxaxpyf_ker_name ) @@ -124,16 +124,16 @@ INSERT_GENTPROT_BASIC( dotxaxpyf_ker_name ) \ void PASTEMAC(ch,opname) \ ( \ - conj_t conjat, \ - conj_t conjx, \ - dim_t m, \ - dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ + conj_t conjat, \ + conj_t conjx, \ + dim_t m, \ + dim_t b_n, \ + ctype* restrict alpha, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ ); INSERT_GENTPROT_BASIC( dotxf_ker_name ) diff --git a/frame/1f/kernels/bli_axpy2v_ref.c b/frame/1f/kernels/bli_axpy2v_ref.c index e91a510cb..a65558540 100644 --- a/frame/1f/kernels/bli_axpy2v_ref.c +++ b/frame/1f/kernels/bli_axpy2v_ref.c @@ -40,15 +40,15 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjx, \ - conj_t conjy, \ - dim_t n, \ - ctype* alphax, \ - ctype* alphay, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* z, inc_t incz, \ - cntx_t* cntx \ + conj_t conjx, \ + conj_t conjy, \ + dim_t n, \ + ctype* restrict alphax, \ + ctype* restrict alphay, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict z, inc_t incz, \ + cntx_t* cntx \ ) \ { \ /* Query the context for the kernel function pointer. */ \ diff --git a/frame/1f/kernels/bli_axpyf_ref.c b/frame/1f/kernels/bli_axpyf_ref.c index 228d53823..72caaf695 100644 --- a/frame/1f/kernels/bli_axpyf_ref.c +++ b/frame/1f/kernels/bli_axpyf_ref.c @@ -40,15 +40,15 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - conj_t conjx, \ - dim_t m, \ - dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ + conj_t conja, \ + conj_t conjx, \ + dim_t m, \ + dim_t b_n, \ + ctype* restrict alpha, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ ) \ { \ ctype* a1; \ diff --git a/frame/1f/kernels/bli_dotaxpyv_ref.c b/frame/1f/kernels/bli_dotaxpyv_ref.c index 22893a5d4..bae1183c5 100644 --- a/frame/1f/kernels/bli_dotaxpyv_ref.c +++ b/frame/1f/kernels/bli_dotaxpyv_ref.c @@ -40,16 +40,16 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjxt, \ - conj_t conjx, \ - conj_t conjy, \ - dim_t m, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* rho, \ - ctype* z, inc_t incz, \ - cntx_t* cntx \ + conj_t conjxt, \ + conj_t conjx, \ + conj_t conjy, \ + dim_t m, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict rho, \ + ctype* restrict z, inc_t incz, \ + cntx_t* cntx \ ) \ { \ ctype* one = PASTEMAC(ch,1); \ diff --git a/frame/1f/kernels/bli_dotxaxpyf_ref_var1.c b/frame/1f/kernels/bli_dotxaxpyf_ref_var1.c index 4d2851fed..0de91f862 100644 --- a/frame/1f/kernels/bli_dotxaxpyf_ref_var1.c +++ b/frame/1f/kernels/bli_dotxaxpyf_ref_var1.c @@ -40,20 +40,20 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjat, \ - conj_t conja, \ - conj_t conjw, \ - conj_t conjx, \ - dim_t m, \ - dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* w, inc_t incw, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - ctype* z, inc_t incz, \ - cntx_t* cntx \ + conj_t conjat, \ + conj_t conja, \ + conj_t conjw, \ + conj_t conjx, \ + dim_t m, \ + dim_t b_n, \ + ctype* restrict alpha, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict w, inc_t incw, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict z, inc_t incz, \ + cntx_t* cntx \ ) \ { \ ctype* a1; \ diff --git a/frame/1f/kernels/bli_dotxaxpyf_ref_var2.c b/frame/1f/kernels/bli_dotxaxpyf_ref_var2.c index 051e86f01..f0123c94b 100644 --- a/frame/1f/kernels/bli_dotxaxpyf_ref_var2.c +++ b/frame/1f/kernels/bli_dotxaxpyf_ref_var2.c @@ -40,20 +40,20 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjat, \ - conj_t conja, \ - conj_t conjw, \ - conj_t conjx, \ - dim_t m, \ - dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* w, inc_t incw, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - ctype* z, inc_t incz, \ - cntx_t* cntx \ + conj_t conjat, \ + conj_t conja, \ + conj_t conjw, \ + conj_t conjx, \ + dim_t m, \ + dim_t b_n, \ + ctype* restrict alpha, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict w, inc_t incw, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict z, inc_t incz, \ + cntx_t* cntx \ ) \ { \ /* A is m x n. */ \ diff --git a/frame/1f/kernels/bli_dotxf_ref.c b/frame/1f/kernels/bli_dotxf_ref.c index 5e50847db..41866b87a 100644 --- a/frame/1f/kernels/bli_dotxf_ref.c +++ b/frame/1f/kernels/bli_dotxf_ref.c @@ -40,16 +40,16 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjat, \ - conj_t conjx, \ - dim_t m, \ - dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ + conj_t conjat, \ + conj_t conjx, \ + dim_t m, \ + dim_t b_n, \ + ctype* restrict alpha, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ ) \ { \ ctype* a1; \ diff --git a/frame/1f/kernels/bli_l1f_ref.h b/frame/1f/kernels/bli_l1f_ref.h index 6a73ac5d1..899a4ba29 100644 --- a/frame/1f/kernels/bli_l1f_ref.h +++ b/frame/1f/kernels/bli_l1f_ref.h @@ -32,129 +32,24 @@ */ +// Redefine level-1f kernel API names to induce prototypes. -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conjx, \ - conj_t conjy, \ - dim_t n, \ - ctype* alpha1, \ - ctype* alpha2, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* z, inc_t incz, \ - cntx_t* cntx \ - ); +#undef axpy2v_ker_name +#define axpy2v_ker_name axpy2v_ref -INSERT_GENTPROT_BASIC( axpy2v_ref ) +#undef dotaxpyv_ker_name +#define dotaxpyv_ker_name dotaxpyv_ref +#undef axpyf_ker_name +#define axpyf_ker_name axpyf_ref -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conja, \ - conj_t conjx, \ - dim_t m, \ - dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ - ); +#undef dotxf_ker_name +#define dotxf_ker_name dotxf_ref -INSERT_GENTPROT_BASIC( axpyf_ref ) +#undef dotxaxpy_ker_name +#define dotxaxpy_ker_name dotxaxpyf_ref +// Include the level-1f kernel API template. -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conjxt, \ - conj_t conjx, \ - conj_t conjy, \ - dim_t m, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* rho, \ - ctype* z, inc_t incz, \ - cntx_t* cntx \ - ); - -INSERT_GENTPROT_BASIC( dotaxpyv_ref ) - - -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conjat, \ - conj_t conja, \ - conj_t conjw, \ - conj_t conjx, \ - dim_t m, \ - dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* w, inc_t incw, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - ctype* z, inc_t incz, \ - cntx_t* cntx \ - ); - -INSERT_GENTPROT_BASIC( dotxaxpyf_ref_var1 ) - - -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conjat, \ - conj_t conja, \ - conj_t conjw, \ - conj_t conjx, \ - dim_t m, \ - dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* w, inc_t incw, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - ctype* z, inc_t incz, \ - cntx_t* cntx \ - ); - -INSERT_GENTPROT_BASIC( dotxaxpyf_ref_var2 ) - - -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conjat, \ - conj_t conjx, \ - dim_t m, \ - dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ - ); - -INSERT_GENTPROT_BASIC( dotxf_ref ) +#include "bli_l1v_ker.h" diff --git a/frame/include/bli_kernel_pre_macro_defs.h b/frame/include/bli_kernel_pre_macro_defs.h index 10f27ac54..98e4c3928 100644 --- a/frame/include/bli_kernel_pre_macro_defs.h +++ b/frame/include/bli_kernel_pre_macro_defs.h @@ -262,10 +262,10 @@ // axpbyv kernels -#define BLIS_SAXPBYV_KERNEL_REF bli_saxpbyv_ref -#define BLIS_DAXPBYV_KERNEL_REF bli_daxpbyv_ref -#define BLIS_CAXPBYV_KERNEL_REF bli_caxpbyv_ref -#define BLIS_ZAXPBYV_KERNEL_REF bli_zaxpbyv_ref +#define BLIS_SAXPBYV_KERNEL_REF bli_saxpbyv_ref +#define BLIS_DAXPBYV_KERNEL_REF bli_daxpbyv_ref +#define BLIS_CAXPBYV_KERNEL_REF bli_caxpbyv_ref +#define BLIS_ZAXPBYV_KERNEL_REF bli_zaxpbyv_ref // axpyv kernels diff --git a/frame/include/bli_kernel_prototypes.h b/frame/include/bli_kernel_prototypes.h index 6a61f484d..d3524358c 100644 --- a/frame/include/bli_kernel_prototypes.h +++ b/frame/include/bli_kernel_prototypes.h @@ -108,6 +108,11 @@ #define bli_caddv_ker_name BLIS_CADDV_KERNEL #define bli_zaddv_ker_name BLIS_ZADDV_KERNEL +#define bli_saxpbyv_ker_name BLIS_SAXPBYV_KERNEL +#define bli_daxpbyv_ker_name BLIS_DAXPBYV_KERNEL +#define bli_caxpbyv_ker_name BLIS_CAXPBYV_KERNEL +#define bli_zaxpbyv_ker_name BLIS_ZAXPBYV_KERNEL + #define bli_saxpyv_ker_name BLIS_SAXPYV_KERNEL #define bli_daxpyv_ker_name BLIS_DAXPYV_KERNEL #define bli_caxpyv_ker_name BLIS_CAXPYV_KERNEL @@ -158,6 +163,11 @@ #define bli_cswapv_ker_name BLIS_CSWAPV_KERNEL #define bli_zswapv_ker_name BLIS_ZSWAPV_KERNEL +#define bli_sxpbyv_ker_name BLIS_SXPBYV_KERNEL +#define bli_dxpbyv_ker_name BLIS_DXPBYV_KERNEL +#define bli_cxpbyv_ker_name BLIS_CXPBYV_KERNEL +#define bli_zxpbyv_ker_name BLIS_ZXPBYV_KERNEL + #include "bli_l1v_ker.h" diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index 5f921b79d..ffdcba56b 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -785,7 +785,7 @@ typedef enum typedef enum { BLIS_ADDV_KER = 0, - BLIS_AXPBYV_KER, + BLIS_AXPBYV_KER, BLIS_AXPYV_KER, BLIS_COPYV_KER, BLIS_DOTV_KER, @@ -796,7 +796,7 @@ typedef enum BLIS_SETV_KER, BLIS_SUBV_KER, BLIS_SWAPV_KER, - BLIS_XPBYV_KER, + BLIS_XPBYV_KER, } l1vkr_t; #define BLIS_NUM_LEVEL1V_KERS 13 diff --git a/testsuite/src/test_axpbyv.c b/testsuite/src/test_axpbyv.c index d9e3c18a0..805e092ec 100644 --- a/testsuite/src/test_axpbyv.c +++ b/testsuite/src/test_axpbyv.c @@ -79,12 +79,12 @@ void libblis_test_axpbyv_deps( test_params_t* params, test_op_t* op ) libblis_test_randv( params, &(op->ops->randv) ); libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_addv( params, &(op->ops->addv) ); - libblis_test_axpyv( params, &(op->ops->axpyv) ); + libblis_test_axpyv( params, &(op->ops->axpyv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_copyv( params, &(op->ops->copyv) ); - libblis_test_scalv( params, &(op->ops->scalv) ); - libblis_test_scal2v( params, &(op->ops->scal2v) ); - libblis_test_xpbyv( params, &(op->ops->xpbyv) ); + libblis_test_scalv( params, &(op->ops->scalv) ); + libblis_test_scal2v( params, &(op->ops->scal2v) ); + libblis_test_xpbyv( params, &(op->ops->xpbyv) ); } @@ -149,8 +149,8 @@ void libblis_test_axpbyv_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); // Create test scalars. - bli_obj_scalar_init_detached( datatype, &alpha ); - bli_obj_scalar_init_detached( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); @@ -165,7 +165,7 @@ void libblis_test_axpbyv_experiment( test_params_t* params, else bli_setsc( 0.0, -2.0, &alpha ); - bli_setsc( -1.0, 0.0, &beta ); + bli_setsc( -1.0, 0.0, &beta ); // Randomize x and y, and save y. bli_randv( &x ); @@ -268,8 +268,8 @@ void libblis_test_axpbyv_check( obj_t* alpha, bli_copyv( x, &x_temp ); bli_copyv( y_orig, &y_temp ); - bli_scalv( alpha, &x_temp ); - bli_scalv( beta, &y_temp ); + bli_scalv( alpha, &x_temp ); + bli_scalv( beta, &y_temp ); bli_addv( &x_temp, &y_temp ); bli_subv( &y_temp, y ); diff --git a/testsuite/src/test_libblis.c b/testsuite/src/test_libblis.c index d48516a12..644c1c2d6 100644 --- a/testsuite/src/test_libblis.c +++ b/testsuite/src/test_libblis.c @@ -113,8 +113,8 @@ void libblis_test_utility_ops( test_params_t* params, test_ops_t* ops ) void libblis_test_level1v_ops( test_params_t* params, test_ops_t* ops ) { libblis_test_addv( params, &(ops->addv) ); - libblis_test_axpbyv( params, &(ops->axpbyv) ); - libblis_test_axpyv( params, &(ops->axpyv) ); + libblis_test_axpbyv( params, &(ops->axpbyv) ); + libblis_test_axpyv( params, &(ops->axpyv) ); libblis_test_copyv( params, &(ops->copyv) ); libblis_test_dotv( params, &(ops->dotv) ); libblis_test_dotxv( params, &(ops->dotxv) ); @@ -123,7 +123,7 @@ void libblis_test_level1v_ops( test_params_t* params, test_ops_t* ops ) libblis_test_scal2v( params, &(ops->scal2v) ); libblis_test_setv( params, &(ops->setv) ); libblis_test_subv( params, &(ops->subv) ); - libblis_test_xpbyv( params, &(ops->xpbyv) ); + libblis_test_xpbyv( params, &(ops->xpbyv) ); } @@ -222,7 +222,7 @@ void libblis_test_read_ops_file( char* input_filename, test_ops_t* ops ) // Level-1v libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->addv) ); - libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->axpbyv) ); + libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->axpbyv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->axpyv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->copyv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 2, &(ops->dotv) ); @@ -232,7 +232,7 @@ void libblis_test_read_ops_file( char* input_filename, test_ops_t* ops ) libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->scal2v) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 0, &(ops->setv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->subv) ); - libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->xpbyv) ); + libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->xpbyv) ); // Level-1m libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MN, 1, &(ops->addm) ); diff --git a/testsuite/src/test_libblis.h b/testsuite/src/test_libblis.h index 8a84a2d9b..86d90b99b 100644 --- a/testsuite/src/test_libblis.h +++ b/testsuite/src/test_libblis.h @@ -204,8 +204,8 @@ typedef struct test_ops_s // level-1v test_op_t addv; - test_op_t axpbyv; - test_op_t axpyv; + test_op_t axpbyv; + test_op_t axpyv; test_op_t copyv; test_op_t dotv; test_op_t dotxv; @@ -214,8 +214,8 @@ typedef struct test_ops_s test_op_t scal2v; test_op_t setv; test_op_t subv; - test_op_t xpbyv; - + test_op_t xpbyv; + // level-1m test_op_t addm; test_op_t axpym; diff --git a/testsuite/src/test_xpbyv.c b/testsuite/src/test_xpbyv.c index a6610905b..684fedf23 100644 --- a/testsuite/src/test_xpbyv.c +++ b/testsuite/src/test_xpbyv.c @@ -143,7 +143,7 @@ void libblis_test_xpbyv_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); // Create test scalars. - bli_obj_scalar_init_detached( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); @@ -255,7 +255,7 @@ void libblis_test_xpbyv_check( obj_t* x, bli_copyv( x, &x_temp ); bli_copyv( y_orig, &y_temp ); - bli_scalv( beta, &y_temp ); + bli_scalv( beta, &y_temp ); bli_addv( &x_temp, &y_temp ); bli_subv( &y_temp, y ); From ed7326c836f427e2f8420b015220ce293207b10c Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Wed, 27 Apr 2016 14:57:40 -0500 Subject: [PATCH 10/14] Added 'restrict' to l1v/l1f code in 'kernels' dir. Details: - Added 'restrict' keyword to existing kernel definitions in 'kernels' directory. These changes were meant for inclusion in bbb8569. --- frame/include/blis.h | 1 - kernels/bgq/1/bli_axpyv_opt_var1.c | 12 ++++---- kernels/bgq/1/bli_dotv_opt_var1.c | 14 +++++----- kernels/bgq/1f/bli_axpyf_opt_var1.c | 18 ++++++------ kernels/x86_64/penryn/1/bli_axpyv_opt_var1.c | 12 ++++---- kernels/x86_64/penryn/1/bli_dotv_opt_var1.c | 14 +++++----- .../x86_64/penryn/1f/bli_axpy2v_opt_var1.c | 18 ++++++------ kernels/x86_64/penryn/1f/bli_axpyf_opt_var1.c | 18 ++++++------ .../x86_64/penryn/1f/bli_dotaxpyv_opt_var1.c | 20 ++++++------- .../x86_64/penryn/1f/bli_dotxaxpyf_opt_var1.c | 28 +++++++++---------- kernels/x86_64/penryn/1f/bli_dotxf_opt_var1.c | 20 ++++++------- 11 files changed, 87 insertions(+), 88 deletions(-) diff --git a/frame/include/blis.h b/frame/include/blis.h index 94da7680c..e20fc5f73 100644 --- a/frame/include/blis.h +++ b/frame/include/blis.h @@ -81,7 +81,6 @@ extern "C" { // -- BLIS kernel definitions -- #include "bli_kernel.h" -//#include "bli_kernel_type_defs.h" #include "bli_kernel_pre_macro_defs.h" #include "bli_kernel_ind_pre_macro_defs.h" diff --git a/kernels/bgq/1/bli_axpyv_opt_var1.c b/kernels/bgq/1/bli_axpyv_opt_var1.c index 33cd4dc0a..7e2a5b6bd 100644 --- a/kernels/bgq/1/bli_axpyv_opt_var1.c +++ b/kernels/bgq/1/bli_axpyv_opt_var1.c @@ -36,12 +36,12 @@ void bli_daxpyv_opt_var1 ( - conj_t conjx, - dim_t n, - double* alpha, - double* x, inc_t incx, - double* y, inc_t incy, - cntx_t* cntx + conj_t conjx, + dim_t n, + double* restrict alpha, + double* restrict x, inc_t incx, + double* restrict y, inc_t incy, + cntx_t* cntx ) { if ( bli_zero_dim1( n ) ) return; diff --git a/kernels/bgq/1/bli_dotv_opt_var1.c b/kernels/bgq/1/bli_dotv_opt_var1.c index 674b1e653..1003c2aa0 100644 --- a/kernels/bgq/1/bli_dotv_opt_var1.c +++ b/kernels/bgq/1/bli_dotv_opt_var1.c @@ -36,13 +36,13 @@ void bli_ddotv_opt_var1 ( - conj_t conjx, - conj_t conjy, - dim_t n, - double* x, inc_t incx, - double* y, inc_t incy, - double* rho, - cntx_t* cntx + conj_t conjx, + conj_t conjy, + dim_t n, + double* restrict x, inc_t incx, + double* restrict y, inc_t incy, + double* restrict rho, + cntx_t* cntx ) { bool_t use_ref = FALSE; diff --git a/kernels/bgq/1f/bli_axpyf_opt_var1.c b/kernels/bgq/1f/bli_axpyf_opt_var1.c index ceff0f32c..25ca9920d 100644 --- a/kernels/bgq/1f/bli_axpyf_opt_var1.c +++ b/kernels/bgq/1f/bli_axpyf_opt_var1.c @@ -37,15 +37,15 @@ void bli_daxpyf_opt_var1 ( - conj_t conja, - conj_t conjx, - dim_t m, - dim_t b_n, - double* alpha, - double* a, inc_t inca, inc_t lda, - double* x, inc_t incx, - double* y, inc_t incy, - cntx_t* cntx + conj_t conja, + conj_t conjx, + dim_t m, + dim_t b_n, + double* restrict alpha, + double* restrict a, inc_t inca, inc_t lda, + double* restrict x, inc_t incx, + double* restrict y, inc_t incy, + cntx_t* cntx ) { const dim_t fusefac = 8; diff --git a/kernels/x86_64/penryn/1/bli_axpyv_opt_var1.c b/kernels/x86_64/penryn/1/bli_axpyv_opt_var1.c index 121068bd5..413b14e20 100644 --- a/kernels/x86_64/penryn/1/bli_axpyv_opt_var1.c +++ b/kernels/x86_64/penryn/1/bli_axpyv_opt_var1.c @@ -45,12 +45,12 @@ typedef union void bli_daxpyv_opt_var1 ( - conj_t conjx, - dim_t n, - double* alpha, - double* x, inc_t incx, - double* y, inc_t incy, - cntx_t* cntx + conj_t conjx, + dim_t n, + double* restrict alpha, + double* restrict x, inc_t incx, + double* restrict y, inc_t incy, + cntx_t* cntx ) { double* restrict alpha_cast = alpha; diff --git a/kernels/x86_64/penryn/1/bli_dotv_opt_var1.c b/kernels/x86_64/penryn/1/bli_dotv_opt_var1.c index 4ce09c4e3..52c532d9d 100644 --- a/kernels/x86_64/penryn/1/bli_dotv_opt_var1.c +++ b/kernels/x86_64/penryn/1/bli_dotv_opt_var1.c @@ -45,13 +45,13 @@ typedef union void bli_ddotv_opt_var1 ( - conj_t conjx, - conj_t conjy, - dim_t n, - double* x, inc_t incx, - double* y, inc_t incy, - double* rho, - cntx_t* cntx + conj_t conjx, + conj_t conjy, + dim_t n, + double* restrict x, inc_t incx, + double* restrict y, inc_t incy, + double* restrict rho, + cntx_t* cntx ) { double* restrict x_cast = x; diff --git a/kernels/x86_64/penryn/1f/bli_axpy2v_opt_var1.c b/kernels/x86_64/penryn/1f/bli_axpy2v_opt_var1.c index 9674dc4d8..29b7a149f 100644 --- a/kernels/x86_64/penryn/1f/bli_axpy2v_opt_var1.c +++ b/kernels/x86_64/penryn/1f/bli_axpy2v_opt_var1.c @@ -45,15 +45,15 @@ typedef union void bli_daxpy2v_int_var1 ( - conj_t conjx, - conj_t conjy, - dim_t n, - double* alpha, - double* beta, - double* x, inc_t incx, - double* y, inc_t incy, - double* z, inc_t incz, - cntx_t* cntx + conj_t conjx, + conj_t conjy, + dim_t n, + double* restrict alpha, + double* restrict beta, + double* restrict x, inc_t incx, + double* restrict y, inc_t incy, + double* restrict z, inc_t incz, + cntx_t* cntx ) { double* restrict alpha_cast = alpha; diff --git a/kernels/x86_64/penryn/1f/bli_axpyf_opt_var1.c b/kernels/x86_64/penryn/1f/bli_axpyf_opt_var1.c index 7c04b0efb..6d67e567a 100644 --- a/kernels/x86_64/penryn/1f/bli_axpyf_opt_var1.c +++ b/kernels/x86_64/penryn/1f/bli_axpyf_opt_var1.c @@ -45,15 +45,15 @@ typedef union void bli_daxpyf_int_var1 ( - conj_t conja, - conj_t conjx, - dim_t m, - dim_t b_n, - double* alpha, - double* a, inc_t inca, inc_t lda, - double* x, inc_t incx, - double* y, inc_t incy, - cntx_t* cntx + conj_t conja, + conj_t conjx, + dim_t m, + dim_t b_n, + double* restrict alpha, + double* restrict a, inc_t inca, inc_t lda, + double* restrict x, inc_t incx, + double* restrict y, inc_t incy, + cntx_t* cntx ) { double* restrict alpha_cast = alpha; diff --git a/kernels/x86_64/penryn/1f/bli_dotaxpyv_opt_var1.c b/kernels/x86_64/penryn/1f/bli_dotaxpyv_opt_var1.c index b42edef0b..a84d12c2d 100644 --- a/kernels/x86_64/penryn/1f/bli_dotaxpyv_opt_var1.c +++ b/kernels/x86_64/penryn/1f/bli_dotaxpyv_opt_var1.c @@ -45,16 +45,16 @@ typedef union void bli_ddotaxpyv_int_var1 ( - conj_t conjxt, - conj_t conjx, - conj_t conjy, - dim_t n, - double* alpha, - double* x, inc_t incx, - double* y, inc_t incy, - double* rho, - double* z, inc_t incz, - cntx_t* cntx + conj_t conjxt, + conj_t conjx, + conj_t conjy, + dim_t n, + double* restrict alpha, + double* restrict x, inc_t incx, + double* restrict y, inc_t incy, + double* restrict rho, + double* restrict z, inc_t incz, + cntx_t* cntx ) { double* restrict alpha_cast = alpha; diff --git a/kernels/x86_64/penryn/1f/bli_dotxaxpyf_opt_var1.c b/kernels/x86_64/penryn/1f/bli_dotxaxpyf_opt_var1.c index fc073b49b..69c638e62 100644 --- a/kernels/x86_64/penryn/1f/bli_dotxaxpyf_opt_var1.c +++ b/kernels/x86_64/penryn/1f/bli_dotxaxpyf_opt_var1.c @@ -45,20 +45,20 @@ typedef union void bli_ddotxaxpyf_int_var1 ( - conj_t conjat, - conj_t conja, - conj_t conjw, - conj_t conjx, - dim_t m, - dim_t b_n, - double* alpha, - double* a, inc_t inca, inc_t lda, - double* w, inc_t incw, - double* x, inc_t incx, - double* beta, - double* y, inc_t incy, - double* z, inc_t incz, - cntx_t* cntx + conj_t conjat, + conj_t conja, + conj_t conjw, + conj_t conjx, + dim_t m, + dim_t b_n, + double* restrict alpha, + double* restrict a, inc_t inca, inc_t lda, + double* restrict w, inc_t incw, + double* restrict x, inc_t incx, + double* restrict beta, + double* restrict y, inc_t incy, + double* restrict z, inc_t incz, + cntx_t* cntx ) { double* restrict alpha_cast = alpha; diff --git a/kernels/x86_64/penryn/1f/bli_dotxf_opt_var1.c b/kernels/x86_64/penryn/1f/bli_dotxf_opt_var1.c index af7f0ecbb..386863513 100644 --- a/kernels/x86_64/penryn/1f/bli_dotxf_opt_var1.c +++ b/kernels/x86_64/penryn/1f/bli_dotxf_opt_var1.c @@ -45,16 +45,16 @@ typedef union void bli_ddotxf_int_var1 ( - conj_t conjat, - conj_t conjx, - dim_t m, - dim_t b_n, - double* alpha, - double* a, inc_t inca, inc_t lda, - double* x, inc_t incx, - double* beta, - double* y, inc_t incy, - cntx_t* cntx + conj_t conjat, + conj_t conjx, + dim_t m, + dim_t b_n, + double* restrict alpha, + double* restrict a, inc_t inca, inc_t lda, + double* restrict x, inc_t incx, + double* restrict beta, + double* restrict y, inc_t incy, + cntx_t* cntx ) { double* restrict alpha_cast = alpha; From 0b01d355ae861754ae2da6c9a545474af010f02e Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Wed, 27 Apr 2016 15:21:10 -0500 Subject: [PATCH 11/14] Miscellaneous cleanups, fixes to recent commits. Details: - Fixed a typo in bli_l1f_ref.h, introduced into bbb8569, that only manifested when non-reference level-1f kernels were used. - Added an #undef BLIS_SIMD_ALIGN_SIZE to bli_kernel.h of dunnington configuration to prevent a compile-time warning until I can figure out the proper permanent fix. - Moved frame/1f/kernels/bli_dotxaxpyf_ref_var1.c out of the compilation path (into 'other' directory). _ref_var2 is used by default, which is the variant that is built on axpyf and dotxf instead of dotaxpyv. - Removed section of frame/include/bli_config_macro_defs.h pertaining to mixed datatype support. --- config/dunnington/bli_kernel.h | 1 + frame/1f/kernels/bli_l1f_ref.h | 14 +++++++------- .../kernels/{ => other}/bli_dotxaxpyf_ref_var1.c | 0 frame/include/bli_config_macro_defs.h | 13 ------------- 4 files changed, 8 insertions(+), 20 deletions(-) rename frame/1f/kernels/{ => other}/bli_dotxaxpyf_ref_var1.c (100%) diff --git a/config/dunnington/bli_kernel.h b/config/dunnington/bli_kernel.h index f5d846d9b..0ed2889de 100644 --- a/config/dunnington/bli_kernel.h +++ b/config/dunnington/bli_kernel.h @@ -38,6 +38,7 @@ // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- +#undef BLIS_SIMD_ALIGN_SIZE #define BLIS_SIMD_ALIGN_SIZE 16 // -- Cache blocksizes -- diff --git a/frame/1f/kernels/bli_l1f_ref.h b/frame/1f/kernels/bli_l1f_ref.h index 899a4ba29..592465845 100644 --- a/frame/1f/kernels/bli_l1f_ref.h +++ b/frame/1f/kernels/bli_l1f_ref.h @@ -35,21 +35,21 @@ // Redefine level-1f kernel API names to induce prototypes. #undef axpy2v_ker_name -#define axpy2v_ker_name axpy2v_ref +#define axpy2v_ker_name axpy2v_ref #undef dotaxpyv_ker_name -#define dotaxpyv_ker_name dotaxpyv_ref +#define dotaxpyv_ker_name dotaxpyv_ref #undef axpyf_ker_name -#define axpyf_ker_name axpyf_ref +#define axpyf_ker_name axpyf_ref #undef dotxf_ker_name -#define dotxf_ker_name dotxf_ref +#define dotxf_ker_name dotxf_ref -#undef dotxaxpy_ker_name -#define dotxaxpy_ker_name dotxaxpyf_ref +#undef dotxaxpyf_ker_name +#define dotxaxpyf_ker_name dotxaxpyf_ref_var2 // Include the level-1f kernel API template. -#include "bli_l1v_ker.h" +#include "bli_l1f_ker.h" diff --git a/frame/1f/kernels/bli_dotxaxpyf_ref_var1.c b/frame/1f/kernels/other/bli_dotxaxpyf_ref_var1.c similarity index 100% rename from frame/1f/kernels/bli_dotxaxpyf_ref_var1.c rename to frame/1f/kernels/other/bli_dotxaxpyf_ref_var1.c diff --git a/frame/include/bli_config_macro_defs.h b/frame/include/bli_config_macro_defs.h index a5bce35c0..a9d713571 100644 --- a/frame/include/bli_config_macro_defs.h +++ b/frame/include/bli_config_macro_defs.h @@ -137,19 +137,6 @@ #define BLIS_POOL_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE -// -- MIXED DATATYPE SUPPORT --------------------------------------------------- - -// Basic (homogeneous) datatype support always enabled. - -// AVOID ENABLING MIXED DATATYPE SUPPORT! IT IS PROBABLY BROKEN. - -// Enable mixed domain operations? -//#define BLIS_ENABLE_MIXED_DOMAIN_SUPPORT - -// Enable extra mixed precision operations? -//#define BLIS_ENABLE_MIXED_PRECISION_SUPPORT - - // -- MISCELLANEOUS OPTIONS ---------------------------------------------------- // Stay initialized after auto-initialization, unless and until the user From c3a4d39d03665135f1616588b5ef7c3e9ef5688d Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Wed, 4 May 2016 17:22:56 -0500 Subject: [PATCH 12/14] Updates to haswell gemm micro-kernels. Details: - Added two new sets of [sd]gemm micro-kernels for haswell architectures, one that is 4x24/4x12 (s and d) and one that is 6x16/6x8. - Changed the haswell configuration to use the 6x16/6x8 micro-kernels by default. - Updated various Makefiles, in test, test/3m4m, and testsuite. --- config/haswell/bli_kernel.h | 23 + frame/3/gemm/bli_gemm_ker_var2.c | 8 +- kernels/x86_64/haswell/3/bli_gemm_asm_d4x12.c | 1403 +++++++++++++++++ kernels/x86_64/haswell/3/bli_gemm_asm_d6x8.c | 1308 +++++++++++++++ kernels/x86_64/haswell/3/bli_gemm_asm_d8x6.c | 42 +- test/3m4m/Makefile | 76 +- test/Makefile | 65 +- testsuite/Makefile | 14 +- 8 files changed, 2818 insertions(+), 121 deletions(-) create mode 100644 kernels/x86_64/haswell/3/bli_gemm_asm_d4x12.c create mode 100644 kernels/x86_64/haswell/3/bli_gemm_asm_d6x8.c diff --git a/config/haswell/bli_kernel.h b/config/haswell/bli_kernel.h index ba0440e64..24a3c68b1 100644 --- a/config/haswell/bli_kernel.h +++ b/config/haswell/bli_kernel.h @@ -60,12 +60,23 @@ #else +/* #define BLIS_SGEMM_UKERNEL bli_sgemm_asm_16x6 #define BLIS_DEFAULT_MC_S 144 #define BLIS_DEFAULT_KC_S 256 #define BLIS_DEFAULT_NC_S 4080 #define BLIS_DEFAULT_MR_S 16 #define BLIS_DEFAULT_NR_S 6 +*/ + +#define BLIS_SGEMM_UKERNEL bli_sgemm_asm_6x16 +#define BLIS_DEFAULT_MC_S 144 +#define BLIS_DEFAULT_KC_S 256 +#define BLIS_DEFAULT_NC_S 4080 +#define BLIS_DEFAULT_MR_S 6 +#define BLIS_DEFAULT_NR_S 16 + +#define BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS #endif @@ -80,12 +91,24 @@ #else +/* #define BLIS_DGEMM_UKERNEL bli_dgemm_asm_8x6 #define BLIS_DEFAULT_MC_D 72 #define BLIS_DEFAULT_KC_D 256 #define BLIS_DEFAULT_NC_D 4080 #define BLIS_DEFAULT_MR_D 8 #define BLIS_DEFAULT_NR_D 6 +*/ + +#define BLIS_DGEMM_UKERNEL bli_dgemm_asm_6x8 +#define BLIS_DEFAULT_MC_D 72 +#define BLIS_DEFAULT_KC_D 256 +#define BLIS_DEFAULT_NC_D 4080 +#define BLIS_DEFAULT_MR_D 6 +#define BLIS_DEFAULT_NR_D 8 + +#define BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS + #endif diff --git a/frame/3/gemm/bli_gemm_ker_var2.c b/frame/3/gemm/bli_gemm_ker_var2.c index 4fbe2a8bb..c86e9b9ad 100644 --- a/frame/3/gemm/bli_gemm_ker_var2.c +++ b/frame/3/gemm/bli_gemm_ker_var2.c @@ -322,9 +322,11 @@ void PASTEMAC(ch,varname) \ } \ } \ \ -/*PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: c", MR, NR, c11, rs_c, cs_c, "%4.1f", "" );*/ \ -/*PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: b1", k, NR, b1, NR, 1, "%4.1f", "" ); \ -PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: a1", MR, k, a1, 1, MR, "%4.1f", "" );*/ \ +/* +PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: b1", k, NR, b1, NR, 1, "%4.1f", "" ); \ +PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: a1", MR, k, a1, 1, MR, "%4.1f", "" ); \ +PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: c after", m_cur, n_cur, c11, rs_c, cs_c, "%4.1f", "" ); \ +*/ \ } INSERT_GENTFUNC_BASIC0( gemm_ker_var2 ) diff --git a/kernels/x86_64/haswell/3/bli_gemm_asm_d4x12.c b/kernels/x86_64/haswell/3/bli_gemm_asm_d4x12.c new file mode 100644 index 000000000..a1d2fc940 --- /dev/null +++ b/kernels/x86_64/haswell/3/bli_gemm_asm_d4x12.c @@ -0,0 +1,1403 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + + +#define SGEMM_INPUT_GS_BETA_NZ \ + "vmovlps (%%rcx ), %%xmm0, %%xmm0 \n\t" \ + "vmovhps (%%rcx,%%rsi,1), %%xmm0, %%xmm0 \n\t" \ + "vmovlps (%%rcx,%%rsi,2), %%xmm1, %%xmm1 \n\t" \ + "vmovhps (%%rcx,%%r13 ), %%xmm1, %%xmm1 \n\t" \ + "vshufps $0x88, %%xmm1, %%xmm0, %%xmm0 \n\t" \ + "vmovlps (%%rcx,%%rsi,4), %%xmm2, %%xmm2 \n\t" \ + "vmovhps (%%rcx,%%r15 ), %%xmm2, %%xmm2 \n\t" \ + "vmovlps (%%rcx,%%r13,2), %%xmm1, %%xmm1 \n\t" \ + "vmovhps (%%rcx,%%r10 ), %%xmm1, %%xmm1 \n\t" \ + "vshufps $0x88, %%xmm1, %%xmm2, %%xmm2 \n\t" \ + "vperm2f128 $0x20, %%ymm2, %%ymm0, %%ymm0 \n\t" + +#define SGEMM_OUTPUT_GS_BETA_NZ \ + "vextractf128 $1, %%ymm0, %%xmm2 \n\t" \ + "vmovss %%xmm0, (%%rcx ) \n\t" \ + "vpermilps $0x39, %%xmm0, %%xmm1 \n\t" \ + "vmovss %%xmm1, (%%rcx,%%rsi,1) \n\t" \ + "vpermilps $0x39, %%xmm1, %%xmm0 \n\t" \ + "vmovss %%xmm0, (%%rcx,%%rsi,2) \n\t" \ + "vpermilps $0x39, %%xmm0, %%xmm1 \n\t" \ + "vmovss %%xmm1, (%%rcx,%%r13 ) \n\t" \ + "vmovss %%xmm2, (%%rcx,%%rsi,4) \n\t" \ + "vpermilps $0x39, %%xmm2, %%xmm1 \n\t" \ + "vmovss %%xmm1, (%%rcx,%%r15 ) \n\t" \ + "vpermilps $0x39, %%xmm1, %%xmm2 \n\t" \ + "vmovss %%xmm2, (%%rcx,%%r13,2) \n\t" \ + "vpermilps $0x39, %%xmm2, %%xmm1 \n\t" \ + "vmovss %%xmm1, (%%rcx,%%r10 ) \n\t" + +void bli_sgemm_asm_4x24 + ( + dim_t k, + float* restrict alpha, + float* restrict a, + float* restrict b, + float* restrict beta, + float* restrict c, inc_t rs_c, inc_t cs_c, + auxinfo_t* restrict data, + cntx_t* restrict cntx + ) +{ + //void* a_next = bli_auxinfo_next_a( data ); + //void* b_next = bli_auxinfo_next_b( data ); + + dim_t k_iter = k / 4; + dim_t k_left = k % 4; + + __asm__ volatile + ( + " \n\t" + "vzeroall \n\t" // zero all xmm/ymm registers. + " \n\t" + " \n\t" + "movq %2, %%rax \n\t" // load address of a. + "movq %3, %%rbx \n\t" // load address of b. + //"movq %9, %%r15 \n\t" // load address of b_next. + " \n\t" + "addq $32 * 4, %%rbx \n\t" + " \n\t" // initialize loop by pre-loading + "vmovaps -4 * 32(%%rbx), %%ymm1 \n\t" + "vmovaps -3 * 32(%%rbx), %%ymm2 \n\t" + "vmovaps -2 * 32(%%rbx), %%ymm3 \n\t" + " \n\t" + "movq %6, %%rcx \n\t" // load address of c + "movq %7, %%rdi \n\t" // load rs_c + "leaq (,%%rdi,4), %%rdi \n\t" // rs_c *= sizeof(float) + " \n\t" + "leaq (%%rdi,%%rdi,2), %%r13 \n\t" // r13 = 3*rs_c; + "prefetcht0 7 * 4(%%rcx) \n\t" // prefetch c + 0*rs_c + "prefetcht0 7 * 4(%%rcx,%%rdi) \n\t" // prefetch c + 1*rs_c + "prefetcht0 7 * 4(%%rcx,%%rdi,2) \n\t" // prefetch c + 2*rs_c + "prefetcht0 7 * 4(%%rcx,%%r13) \n\t" // prefetch c + 3*rs_c + " \n\t" + " \n\t" + " \n\t" + " \n\t" + "movq %0, %%rsi \n\t" // i = k_iter; + "testq %%rsi, %%rsi \n\t" // check i via logical AND. + "je .SCONSIDKLEFT \n\t" // if i == 0, jump to code that + " \n\t" // contains the k_left loop. + " \n\t" + " \n\t" + ".SLOOPKITER: \n\t" // MAIN LOOP + " \n\t" + " \n\t" + " \n\t" // iteration 0 + "prefetcht0 24 * 4(%%rax) \n\t" + " \n\t" + "vbroadcastf128 0 * 4(%%rax), %%ymm0 \n\t" + "vfmadd231ps %%ymm1, %%ymm0, %%ymm4 \n\t" + "vfmadd231ps %%ymm2, %%ymm0, %%ymm5 \n\t" + "vfmadd231ps %%ymm3, %%ymm0, %%ymm6 \n\t" + "vpermilps $0x93, %%ymm0, %%ymm0 \n\t" + "vfmadd231ps %%ymm1, %%ymm0, %%ymm7 \n\t" + "vfmadd231ps %%ymm2, %%ymm0, %%ymm8 \n\t" + "vfmadd231ps %%ymm3, %%ymm0, %%ymm9 \n\t" + "vpermilps $0x93, %%ymm0, %%ymm0 \n\t" + "vfmadd231ps %%ymm1, %%ymm0, %%ymm10 \n\t" + "vfmadd231ps %%ymm2, %%ymm0, %%ymm11 \n\t" + "vfmadd231ps %%ymm3, %%ymm0, %%ymm12 \n\t" + "vpermilps $0x93, %%ymm0, %%ymm0 \n\t" + "vfmadd231ps %%ymm1, %%ymm0, %%ymm13 \n\t" + "vmovaps -1 * 32(%%rbx), %%ymm1 \n\t" + "vfmadd231ps %%ymm2, %%ymm0, %%ymm14 \n\t" + "vmovaps 0 * 32(%%rbx), %%ymm2 \n\t" + "vfmadd231ps %%ymm3, %%ymm0, %%ymm15 \n\t" + "vmovaps 1 * 32(%%rbx), %%ymm3 \n\t" + " \n\t" + " \n\t" // iteration 1 + "vbroadcastf128 4 * 4(%%rax), %%ymm0 \n\t" + "vfmadd231ps %%ymm1, %%ymm0, %%ymm4 \n\t" + "vfmadd231ps %%ymm2, %%ymm0, %%ymm5 \n\t" + "vfmadd231ps %%ymm3, %%ymm0, %%ymm6 \n\t" + "vpermilps $0x93, %%ymm0, %%ymm0 \n\t" + "vfmadd231ps %%ymm1, %%ymm0, %%ymm7 \n\t" + "vfmadd231ps %%ymm2, %%ymm0, %%ymm8 \n\t" + "vfmadd231ps %%ymm3, %%ymm0, %%ymm9 \n\t" + "vpermilps $0x93, %%ymm0, %%ymm0 \n\t" + "vfmadd231ps %%ymm1, %%ymm0, %%ymm10 \n\t" + "vfmadd231ps %%ymm2, %%ymm0, %%ymm11 \n\t" + "vfmadd231ps %%ymm3, %%ymm0, %%ymm12 \n\t" + "vpermilps $0x93, %%ymm0, %%ymm0 \n\t" + "vfmadd231ps %%ymm1, %%ymm0, %%ymm13 \n\t" + "vmovaps 2 * 32(%%rbx), %%ymm1 \n\t" + "vfmadd231ps %%ymm2, %%ymm0, %%ymm14 \n\t" + "vmovaps 3 * 32(%%rbx), %%ymm2 \n\t" + "vfmadd231ps %%ymm3, %%ymm0, %%ymm15 \n\t" + "vmovaps 4 * 32(%%rbx), %%ymm3 \n\t" + " \n\t" + " \n\t" // iteration 2 + //"prefetcht0 32 * 4(%%rax) \n\t" + " \n\t" + "vbroadcastf128 8 * 4(%%rax), %%ymm0 \n\t" + "vfmadd231ps %%ymm1, %%ymm0, %%ymm4 \n\t" + "vfmadd231ps %%ymm2, %%ymm0, %%ymm5 \n\t" + "vfmadd231ps %%ymm3, %%ymm0, %%ymm6 \n\t" + "vpermilps $0x93, %%ymm0, %%ymm0 \n\t" + "vfmadd231ps %%ymm1, %%ymm0, %%ymm7 \n\t" + "vfmadd231ps %%ymm2, %%ymm0, %%ymm8 \n\t" + "vfmadd231ps %%ymm3, %%ymm0, %%ymm9 \n\t" + "vpermilps $0x93, %%ymm0, %%ymm0 \n\t" + "vfmadd231ps %%ymm1, %%ymm0, %%ymm10 \n\t" + "vfmadd231ps %%ymm2, %%ymm0, %%ymm11 \n\t" + "vfmadd231ps %%ymm3, %%ymm0, %%ymm12 \n\t" + "vpermilps $0x93, %%ymm0, %%ymm0 \n\t" + "vfmadd231ps %%ymm1, %%ymm0, %%ymm13 \n\t" + "vmovaps 5 * 32(%%rbx), %%ymm1 \n\t" + "vfmadd231ps %%ymm2, %%ymm0, %%ymm14 \n\t" + "vmovaps 6 * 32(%%rbx), %%ymm2 \n\t" + "vfmadd231ps %%ymm3, %%ymm0, %%ymm15 \n\t" + "vmovaps 7 * 32(%%rbx), %%ymm3 \n\t" + " \n\t" + " \n\t" // iteration 3 + "vbroadcastf128 12 * 4(%%rax), %%ymm0 \n\t" + "vfmadd231ps %%ymm1, %%ymm0, %%ymm4 \n\t" + "vfmadd231ps %%ymm2, %%ymm0, %%ymm5 \n\t" + "vfmadd231ps %%ymm3, %%ymm0, %%ymm6 \n\t" + "vpermilps $0x93, %%ymm0, %%ymm0 \n\t" + "vfmadd231ps %%ymm1, %%ymm0, %%ymm7 \n\t" + "vfmadd231ps %%ymm2, %%ymm0, %%ymm8 \n\t" + "vfmadd231ps %%ymm3, %%ymm0, %%ymm9 \n\t" + "vpermilps $0x93, %%ymm0, %%ymm0 \n\t" + "vfmadd231ps %%ymm1, %%ymm0, %%ymm10 \n\t" + "vfmadd231ps %%ymm2, %%ymm0, %%ymm11 \n\t" + "vfmadd231ps %%ymm3, %%ymm0, %%ymm12 \n\t" + "vpermilps $0x93, %%ymm0, %%ymm0 \n\t" + "vfmadd231ps %%ymm1, %%ymm0, %%ymm13 \n\t" + "vmovaps 8 * 32(%%rbx), %%ymm1 \n\t" + "vfmadd231ps %%ymm2, %%ymm0, %%ymm14 \n\t" + "vmovaps 9 * 32(%%rbx), %%ymm2 \n\t" + "vfmadd231ps %%ymm3, %%ymm0, %%ymm15 \n\t" + "vmovaps 10 * 32(%%rbx), %%ymm3 \n\t" + " \n\t" + " \n\t" + " \n\t" + "addq $4 * 4 * 4, %%rax \n\t" // a += 4*4 (unroll x mr) + "addq $4 * 24 * 4, %%rbx \n\t" // b += 4*24 (unroll x nr) + " \n\t" + " \n\t" + "decq %%rsi \n\t" // i -= 1; + "jne .SLOOPKITER \n\t" // iterate again if i != 0. + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + ".SCONSIDKLEFT: \n\t" + " \n\t" + "movq %1, %%rsi \n\t" // i = k_left; + "testq %%rsi, %%rsi \n\t" // check i via logical AND. + "je .SPOSTACCUM \n\t" // if i == 0, we're done; jump to end. + " \n\t" // else, we prepare to enter k_left loop. + " \n\t" + " \n\t" + ".SLOOPKLEFT: \n\t" // EDGE LOOP + " \n\t" + "prefetcht0 24 * 4(%%rax) \n\t" + " \n\t" + "vbroadcastf128 0 * 4(%%rax), %%ymm0 \n\t" + "vfmadd231ps %%ymm1, %%ymm0, %%ymm4 \n\t" + "vfmadd231ps %%ymm2, %%ymm0, %%ymm5 \n\t" + "vfmadd231ps %%ymm3, %%ymm0, %%ymm6 \n\t" + "vpermilps $0x93, %%ymm0, %%ymm0 \n\t" + "vfmadd231ps %%ymm1, %%ymm0, %%ymm7 \n\t" + "vfmadd231ps %%ymm2, %%ymm0, %%ymm8 \n\t" + "vfmadd231ps %%ymm3, %%ymm0, %%ymm9 \n\t" + "vpermilps $0x93, %%ymm0, %%ymm0 \n\t" + "vfmadd231ps %%ymm1, %%ymm0, %%ymm10 \n\t" + "vfmadd231ps %%ymm2, %%ymm0, %%ymm11 \n\t" + "vfmadd231ps %%ymm3, %%ymm0, %%ymm12 \n\t" + "vpermilps $0x93, %%ymm0, %%ymm0 \n\t" + "vfmadd231ps %%ymm1, %%ymm0, %%ymm13 \n\t" + "vmovaps -1 * 32(%%rbx), %%ymm1 \n\t" + "vfmadd231ps %%ymm2, %%ymm0, %%ymm14 \n\t" + "vmovaps 0 * 32(%%rbx), %%ymm2 \n\t" + "vfmadd231ps %%ymm3, %%ymm0, %%ymm15 \n\t" + "vmovaps 1 * 32(%%rbx), %%ymm3 \n\t" + " \n\t" + " \n\t" + " \n\t" + "addq $1 * 4 * 4, %%rax \n\t" // a += 1*4 (unroll x mr) + "addq $1 * 24 * 4, %%rbx \n\t" // b += 1*24 (unroll x nr) + " \n\t" + " \n\t" + "decq %%rsi \n\t" // i -= 1; + "jne .SLOOPKLEFT \n\t" // iterate again if i != 0. + " \n\t" + " \n\t" + " \n\t" + ".SPOSTACCUM: \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + "movq %4, %%rax \n\t" // load address of alpha + "movq %5, %%rbx \n\t" // load address of beta + "vbroadcastss (%%rax), %%ymm0 \n\t" // load alpha and duplicate + "vbroadcastss (%%rbx), %%ymm3 \n\t" // load beta and duplicate + " \n\t" + "vmulps %%ymm0, %%ymm4, %%ymm4 \n\t" // scale by alpha + "vmulps %%ymm0, %%ymm5, %%ymm5 \n\t" + "vmulps %%ymm0, %%ymm6, %%ymm6 \n\t" + "vmulps %%ymm0, %%ymm7, %%ymm7 \n\t" + "vmulps %%ymm0, %%ymm8, %%ymm8 \n\t" + "vmulps %%ymm0, %%ymm9, %%ymm9 \n\t" + "vmulps %%ymm0, %%ymm10, %%ymm10 \n\t" + "vmulps %%ymm0, %%ymm11, %%ymm11 \n\t" + "vmulps %%ymm0, %%ymm12, %%ymm12 \n\t" + "vmulps %%ymm0, %%ymm13, %%ymm13 \n\t" + "vmulps %%ymm0, %%ymm14, %%ymm14 \n\t" + "vmulps %%ymm0, %%ymm15, %%ymm15 \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" // ymm4 : ( ab00 ab11 ab22 ab33 ... ) + " \n\t" // ymm7 : ( ab30 ab01 ab12 ab23 ... ) + " \n\t" // ymm10: ( ab20 ab31 ab02 ab13 ... ) + " \n\t" // ymm13: ( ab10 ab21 ab32 ab03 ... ) + " \n\t" + "vmovaps %%ymm4, %%ymm0 \n\t" + "vmovaps %%ymm7, %%ymm1 \n\t" + "vmovaps %%ymm10, %%ymm2 \n\t" + "vshufps $0xd8, %%ymm7, %%ymm4, %%ymm4 \n\t" + "vshufps $0x72, %%ymm13, %%ymm10, %%ymm7 \n\t" + "vshufps $0x8d, %%ymm13, %%ymm0, %%ymm10 \n\t" + "vshufps $0x27, %%ymm1, %%ymm2, %%ymm13 \n\t" + " \n\t" + "vmovaps %%ymm5, %%ymm0 \n\t" + "vmovaps %%ymm8, %%ymm1 \n\t" + "vmovaps %%ymm11, %%ymm2 \n\t" + "vshufps $0xd8, %%ymm8, %%ymm5, %%ymm5 \n\t" + "vshufps $0x72, %%ymm14, %%ymm11, %%ymm8 \n\t" + "vshufps $0x8d, %%ymm14, %%ymm0, %%ymm11 \n\t" + "vshufps $0x27, %%ymm1, %%ymm2, %%ymm14 \n\t" + " \n\t" + "vmovaps %%ymm6, %%ymm0 \n\t" + "vmovaps %%ymm9, %%ymm1 \n\t" + "vmovaps %%ymm12, %%ymm2 \n\t" + "vshufps $0xd8, %%ymm9, %%ymm6, %%ymm6 \n\t" + "vshufps $0x72, %%ymm15, %%ymm12, %%ymm9 \n\t" + "vshufps $0x8d, %%ymm15, %%ymm0, %%ymm12 \n\t" + "vshufps $0x27, %%ymm1, %%ymm2, %%ymm15 \n\t" + " \n\t" + " \n\t" // ymm4 : ( ab00 ab22 ab01 ab23 ... ) + " \n\t" // ymm7 : ( ab02 ab20 ab03 ab21 ... ) + " \n\t" // ymm10: ( ab11 ab33 ab10 ab32 ... ) + " \n\t" // ymm13: ( ab13 ab31 ab12 ab30 ... ) + " \n\t" + "vmovaps %%ymm4, %%ymm0 \n\t" + "vmovaps %%ymm7, %%ymm1 \n\t" + "vmovaps %%ymm10, %%ymm2 \n\t" + "vshufps $0x88, %%ymm7, %%ymm4, %%ymm4 \n\t" + "vshufps $0x22, %%ymm13, %%ymm10, %%ymm7 \n\t" + "vshufps $0xdd, %%ymm0, %%ymm1, %%ymm10 \n\t" + "vshufps $0x77, %%ymm2, %%ymm13, %%ymm13 \n\t" + " \n\t" + "vmovaps %%ymm5, %%ymm0 \n\t" + "vmovaps %%ymm8, %%ymm1 \n\t" + "vmovaps %%ymm11, %%ymm2 \n\t" + "vshufps $0x88, %%ymm8, %%ymm5, %%ymm5 \n\t" + "vshufps $0x22, %%ymm14, %%ymm11, %%ymm8 \n\t" + "vshufps $0xdd, %%ymm0, %%ymm1, %%ymm11 \n\t" + "vshufps $0x77, %%ymm2, %%ymm14, %%ymm14 \n\t" + " \n\t" + "vmovaps %%ymm6, %%ymm0 \n\t" + "vmovaps %%ymm9, %%ymm1 \n\t" + "vmovaps %%ymm12, %%ymm2 \n\t" + "vshufps $0x88, %%ymm9, %%ymm6, %%ymm6 \n\t" + "vshufps $0x22, %%ymm15, %%ymm12, %%ymm9 \n\t" + "vshufps $0xdd, %%ymm0, %%ymm1, %%ymm12 \n\t" + "vshufps $0x77, %%ymm2, %%ymm15, %%ymm15 \n\t" + " \n\t" + " \n\t" // ymm4 : ( ab00 ab01 ab02 ab03 ... ) + " \n\t" // ymm7 : ( ab10 ab11 ab12 ab13 ... ) + " \n\t" // ymm10: ( ab20 ab21 ab22 ab23 ... ) + " \n\t" // ymm13: ( ab30 ab31 ab32 ab33 ... ) + " \n\t" + " \n\t" + " \n\t" + " \n\t" + "movq %8, %%rsi \n\t" // load cs_c + "leaq (,%%rsi,4), %%rsi \n\t" // rsi = cs_c * sizeof(float) + " \n\t" + "leaq (%%rcx,%%rsi,8), %%rdx \n\t" // rdx = c + 8*cs_c; + "leaq (%%rdx,%%rsi,8), %%r12 \n\t" // r12 = c + 16*cs_c; + " \n\t" + "leaq (%%rsi,%%rsi,2), %%r13 \n\t" // r13 = 3*cs_c; + "leaq (%%rsi,%%rsi,4), %%r15 \n\t" // r15 = 5*cs_c; + "leaq (%%r13,%%rsi,4), %%r10 \n\t" // r10 = 7*cs_c; + " \n\t" + " \n\t" + " \n\t" + " \n\t" // determine if + " \n\t" // c % 32 == 0, AND + " \n\t" // 8*rs_c % 32 == 0, AND + " \n\t" // cs_c == 1 + " \n\t" // ie: aligned, ldim aligned, and + " \n\t" // row-stored + " \n\t" + "cmpq $4, %%rsi \n\t" // set ZF if (4*rs_c) == 4. + "sete %%bl \n\t" // bl = ( ZF == 1 ? 1 : 0 ); + "testq $31, %%rcx \n\t" // set ZF if c & 32 is zero. + "setz %%bh \n\t" // bh = ( ZF == 0 ? 1 : 0 ); + "testq $31, %%rdi \n\t" // set ZF if (4*rs_c) & 32 is zero. + "setz %%al \n\t" // al = ( ZF == 0 ? 1 : 0 ); + " \n\t" // and(bl,bh) followed by + " \n\t" // and(bh,al) will reveal result + " \n\t" + " \n\t" // now avoid loading C if beta == 0 + " \n\t" + "vxorps %%ymm0, %%ymm0, %%ymm0 \n\t" // set ymm0 to zero. + "vucomiss %%xmm0, %%xmm3 \n\t" // set ZF if beta == 0. + "je .SBETAZERO \n\t" // if ZF = 1, jump to beta == 0 case + " \n\t" + " \n\t" + " \n\t" // check if aligned/row-stored + "andb %%bl, %%bh \n\t" // set ZF if bl & bh == 1. + "andb %%bh, %%al \n\t" // set ZF if bh & al == 1. + "jne .SROWSTORED \n\t" // jump to row storage case + " \n\t" + " \n\t" + " \n\t" + ".SGENSTORED: \n\t" + " \n\t" + " \n\t" + SGEMM_INPUT_GS_BETA_NZ + "vfmadd213ps %%ymm4, %%ymm3, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + SGEMM_INPUT_GS_BETA_NZ + "vfmadd213ps %%ymm7, %%ymm3, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + SGEMM_INPUT_GS_BETA_NZ + "vfmadd213ps %%ymm10, %%ymm3, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + SGEMM_INPUT_GS_BETA_NZ + "vfmadd213ps %%ymm13, %%ymm3, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + //"addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "movq %%rdx, %%rcx \n\t" // rcx = c + 8*cs_c + " \n\t" + " \n\t" + SGEMM_INPUT_GS_BETA_NZ + "vfmadd213ps %%ymm5, %%ymm3, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + SGEMM_INPUT_GS_BETA_NZ + "vfmadd213ps %%ymm8, %%ymm3, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + SGEMM_INPUT_GS_BETA_NZ + "vfmadd213ps %%ymm11, %%ymm3, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + SGEMM_INPUT_GS_BETA_NZ + "vfmadd213ps %%ymm14, %%ymm3, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + //"addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "movq %%r12, %%rcx \n\t" // rcx = c + 16*cs_c + " \n\t" + " \n\t" + SGEMM_INPUT_GS_BETA_NZ + "vfmadd213ps %%ymm6, %%ymm3, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + SGEMM_INPUT_GS_BETA_NZ + "vfmadd213ps %%ymm9, %%ymm3, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + SGEMM_INPUT_GS_BETA_NZ + "vfmadd213ps %%ymm12, %%ymm3, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + SGEMM_INPUT_GS_BETA_NZ + "vfmadd213ps %%ymm15, %%ymm3, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + //"addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + "jmp .SDONE \n\t" // jump to end. + " \n\t" + " \n\t" + " \n\t" + ".SROWSTORED: \n\t" + " \n\t" + " \n\t" + "vmovaps (%%rcx), %%ymm0 \n\t" + "vfmadd213ps %%ymm4, %%ymm3, %%ymm0 \n\t" + "vmovaps %%ymm0, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps (%%rdx), %%ymm1 \n\t" + "vfmadd213ps %%ymm5, %%ymm3, %%ymm1 \n\t" + "vmovaps %%ymm1, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + "vmovaps (%%r12), %%ymm2 \n\t" + "vfmadd213ps %%ymm6, %%ymm3, %%ymm2 \n\t" + "vmovaps %%ymm2, (%%r12) \n\t" + "addq %%rdi, %%r12 \n\t" + " \n\t" + " \n\t" + "vmovaps (%%rcx), %%ymm0 \n\t" + "vfmadd213ps %%ymm7, %%ymm3, %%ymm0 \n\t" + "vmovaps %%ymm0, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps (%%rdx), %%ymm1 \n\t" + "vfmadd213ps %%ymm8, %%ymm3, %%ymm1 \n\t" + "vmovaps %%ymm1, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + "vmovaps (%%r12), %%ymm2 \n\t" + "vfmadd213ps %%ymm9, %%ymm3, %%ymm2 \n\t" + "vmovaps %%ymm2, (%%r12) \n\t" + "addq %%rdi, %%r12 \n\t" + " \n\t" + " \n\t" + "vmovaps (%%rcx), %%ymm0 \n\t" + "vfmadd213ps %%ymm10, %%ymm3, %%ymm0 \n\t" + "vmovaps %%ymm0, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps (%%rdx), %%ymm1 \n\t" + "vfmadd213ps %%ymm11, %%ymm3, %%ymm1 \n\t" + "vmovaps %%ymm1, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + "vmovaps (%%r12), %%ymm2 \n\t" + "vfmadd213ps %%ymm12, %%ymm3, %%ymm2 \n\t" + "vmovaps %%ymm2, (%%r12) \n\t" + "addq %%rdi, %%r12 \n\t" + " \n\t" + " \n\t" + "vmovaps (%%rcx), %%ymm0 \n\t" + "vfmadd213ps %%ymm13, %%ymm3, %%ymm0 \n\t" + "vmovaps %%ymm0, (%%rcx) \n\t" + //"addq %%rdi, %%rcx \n\t" + "vmovaps (%%rdx), %%ymm1 \n\t" + "vfmadd213ps %%ymm14, %%ymm3, %%ymm1 \n\t" + "vmovaps %%ymm1, (%%rdx) \n\t" + //"addq %%rdi, %%rdx \n\t" + "vmovaps (%%r12), %%ymm2 \n\t" + "vfmadd213ps %%ymm15, %%ymm3, %%ymm2 \n\t" + "vmovaps %%ymm2, (%%r12) \n\t" + //"addq %%rdi, %%r12 \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + "jmp .SDONE \n\t" // jump to end. + " \n\t" + " \n\t" + " \n\t" + ".SBETAZERO: \n\t" + " \n\t" // check if aligned/row-stored + "andb %%bl, %%bh \n\t" // set ZF if bl & bh == 1. + "andb %%bh, %%al \n\t" // set ZF if bh & al == 1. + "jne .SROWSTORBZ \n\t" // jump to row storage case + " \n\t" + " \n\t" + " \n\t" + ".SGENSTORBZ: \n\t" + " \n\t" + " \n\t" + "vmovaps %%ymm4, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm7, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm10, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm13, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + //"addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "movq %%rdx, %%rcx \n\t" // rcx = c + 8*cs_c + " \n\t" + " \n\t" + "vmovaps %%ymm5, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm8, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm11, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm14, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + //"addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "movq %%r12, %%rcx \n\t" // rcx = c + 16*cs_c + " \n\t" + " \n\t" + "vmovaps %%ymm6, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm9, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm12, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm15, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + //"addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + "jmp .SDONE \n\t" // jump to end. + " \n\t" + " \n\t" + " \n\t" + ".SROWSTORBZ: \n\t" + " \n\t" + " \n\t" + "vmovaps %%ymm4, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps %%ymm5, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + "vmovaps %%ymm6, (%%r12) \n\t" + "addq %%rdi, %%r12 \n\t" + " \n\t" + "vmovaps %%ymm7, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps %%ymm8, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + "vmovaps %%ymm9, (%%r12) \n\t" + "addq %%rdi, %%r12 \n\t" + " \n\t" + "vmovaps %%ymm10, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps %%ymm11, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + "vmovaps %%ymm12, (%%r12) \n\t" + "addq %%rdi, %%r12 \n\t" + " \n\t" + "vmovaps %%ymm13, (%%rcx) \n\t" + //"addq %%rdi, %%rcx \n\t" + "vmovaps %%ymm14, (%%rdx) \n\t" + //"addq %%rdi, %%rdx \n\t" + "vmovaps %%ymm15, (%%r12) \n\t" + //"addq %%rdi, %%r12 \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + ".SDONE: \n\t" + " \n\t" + + : // output operands (none) + : // input operands + "m" (k_iter), // 0 + "m" (k_left), // 1 + "m" (a), // 2 + "m" (b), // 3 + "m" (alpha), // 4 + "m" (beta), // 5 + "m" (c), // 6 + "m" (rs_c), // 7 + "m" (cs_c)/*, // 8 + "m" (b_next), // 9 + "m" (a_next)*/ // 10 + : // register clobber list + "rax", "rbx", "rcx", "rdx", "rsi", "rdi", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "xmm0", "xmm1", "xmm2", "xmm3", + "xmm4", "xmm5", "xmm6", "xmm7", + "xmm8", "xmm9", "xmm10", "xmm11", + "xmm12", "xmm13", "xmm14", "xmm15", + "memory" + ); +} + + + +#define DGEMM_INPUT_GS_BETA_NZ \ + "vmovlpd (%%rcx ), %%xmm0, %%xmm0 \n\t" \ + "vmovhpd (%%rcx,%%rsi,1), %%xmm0, %%xmm0 \n\t" \ + "vmovlpd (%%rcx,%%rsi,2), %%xmm1, %%xmm1 \n\t" \ + "vmovhpd (%%rcx,%%r13 ), %%xmm1, %%xmm1 \n\t" \ + "vperm2f128 $0x20, %%ymm1, %%ymm0, %%ymm0 \n\t" /*\ + "vmovlps (%%rcx,%%rsi,4), %%xmm2, %%xmm2 \n\t" \ + "vmovhps (%%rcx,%%r15 ), %%xmm2, %%xmm2 \n\t" \ + "vmovlps (%%rcx,%%r13,2), %%xmm1, %%xmm1 \n\t" \ + "vmovhps (%%rcx,%%r10 ), %%xmm1, %%xmm1 \n\t" \ + "vperm2f128 $0x20, %%ymm1, %%ymm2, %%ymm2 \n\t"*/ + +#define DGEMM_OUTPUT_GS_BETA_NZ \ + "vextractf128 $1, %%ymm0, %%xmm1 \n\t" \ + "vmovlpd %%xmm0, (%%rcx ) \n\t" \ + "vmovhpd %%xmm0, (%%rcx,%%rsi ) \n\t" \ + "vmovlpd %%xmm1, (%%rcx,%%rsi,2) \n\t" \ + "vmovhpd %%xmm1, (%%rcx,%%r13 ) \n\t" /*\ + "vextractf128 $1, %%ymm2, %%xmm1 \n\t" \ + "vmovlpd %%xmm2, (%%rcx,%%rsi,4) \n\t" \ + "vmovhpd %%xmm2, (%%rcx,%%r15 ) \n\t" \ + "vmovlpd %%xmm1, (%%rcx,%%r13,2) \n\t" \ + "vmovhpd %%xmm1, (%%rcx,%%r10 ) \n\t"*/ + +void bli_dgemm_asm_4x12 + ( + dim_t k, + double* restrict alpha, + double* restrict a, + double* restrict b, + double* restrict beta, + double* restrict c, inc_t rs_c, inc_t cs_c, + auxinfo_t* restrict data, + cntx_t* restrict cntx + ) +{ + //void* a_next = bli_auxinfo_next_a( data ); + //void* b_next = bli_auxinfo_next_b( data ); + + dim_t k_iter = k / 4; + dim_t k_left = k % 4; + + __asm__ volatile + ( + " \n\t" + "vzeroall \n\t" // zero all xmm/ymm registers. + " \n\t" + " \n\t" + "movq %2, %%rax \n\t" // load address of a. + "movq %3, %%rbx \n\t" // load address of b. + //"movq %9, %%r15 \n\t" // load address of b_next. + " \n\t" + "addq $32 * 4, %%rbx \n\t" + " \n\t" // initialize loop by pre-loading + "vmovapd -4 * 32(%%rbx), %%ymm1 \n\t" + "vmovapd -3 * 32(%%rbx), %%ymm2 \n\t" + "vmovapd -2 * 32(%%rbx), %%ymm3 \n\t" + " \n\t" + "movq %6, %%rcx \n\t" // load address of c + "movq %7, %%rdi \n\t" // load rs_c + "leaq (,%%rdi,8), %%rdi \n\t" // rs_c *= sizeof(double) + " \n\t" + "leaq (%%rdi,%%rdi,2), %%r13 \n\t" // r13 = 3*rs_c; + "prefetcht0 7 * 8(%%rcx) \n\t" // prefetch c + 0*rs_c + "prefetcht0 7 * 8(%%rcx,%%rdi) \n\t" // prefetch c + 1*rs_c + "prefetcht0 7 * 8(%%rcx,%%rdi,2) \n\t" // prefetch c + 2*rs_c + "prefetcht0 7 * 8(%%rcx,%%r13) \n\t" // prefetch c + 3*rs_c + " \n\t" + " \n\t" + " \n\t" + " \n\t" + "movq %0, %%rsi \n\t" // i = k_iter; + "testq %%rsi, %%rsi \n\t" // check i via logical AND. + "je .DCONSIDKLEFT \n\t" // if i == 0, jump to code that + " \n\t" // contains the k_left loop. + " \n\t" + " \n\t" + ".DLOOPKITER: \n\t" // MAIN LOOP + " \n\t" + " \n\t" + " \n\t" // iteration 0 + "prefetcht0 24 * 8(%%rax) \n\t" + " \n\t" + "vbroadcastf128 0 * 8(%%rax), %%ymm0 \n\t" + "vfmadd231pd %%ymm1, %%ymm0, %%ymm4 \n\t" + "vfmadd231pd %%ymm2, %%ymm0, %%ymm5 \n\t" + "vfmadd231pd %%ymm3, %%ymm0, %%ymm6 \n\t" + "vpermilpd $0x5, %%ymm0, %%ymm0 \n\t" + "vfmadd231pd %%ymm1, %%ymm0, %%ymm7 \n\t" + "vfmadd231pd %%ymm2, %%ymm0, %%ymm8 \n\t" + "vfmadd231pd %%ymm3, %%ymm0, %%ymm9 \n\t" + "vbroadcastf128 2 * 8(%%rax), %%ymm0 \n\t" + "vfmadd231pd %%ymm1, %%ymm0, %%ymm10 \n\t" + "vfmadd231pd %%ymm2, %%ymm0, %%ymm11 \n\t" + "vfmadd231pd %%ymm3, %%ymm0, %%ymm12 \n\t" + "vpermilpd $0x5, %%ymm0, %%ymm0 \n\t" + "vfmadd231pd %%ymm1, %%ymm0, %%ymm13 \n\t" + "vmovapd -1 * 32(%%rbx), %%ymm1 \n\t" + "vfmadd231pd %%ymm2, %%ymm0, %%ymm14 \n\t" + "vmovapd 0 * 32(%%rbx), %%ymm2 \n\t" + "vfmadd231pd %%ymm3, %%ymm0, %%ymm15 \n\t" + "vmovapd 1 * 32(%%rbx), %%ymm3 \n\t" + " \n\t" + " \n\t" // iteration 1 + "vbroadcastf128 4 * 8(%%rax), %%ymm0 \n\t" + "vfmadd231pd %%ymm1, %%ymm0, %%ymm4 \n\t" + "vfmadd231pd %%ymm2, %%ymm0, %%ymm5 \n\t" + "vfmadd231pd %%ymm3, %%ymm0, %%ymm6 \n\t" + "vpermilpd $0x5, %%ymm0, %%ymm0 \n\t" + "vfmadd231pd %%ymm1, %%ymm0, %%ymm7 \n\t" + "vfmadd231pd %%ymm2, %%ymm0, %%ymm8 \n\t" + "vfmadd231pd %%ymm3, %%ymm0, %%ymm9 \n\t" + "vbroadcastf128 6 * 8(%%rax), %%ymm0 \n\t" + "vfmadd231pd %%ymm1, %%ymm0, %%ymm10 \n\t" + "vfmadd231pd %%ymm2, %%ymm0, %%ymm11 \n\t" + "vfmadd231pd %%ymm3, %%ymm0, %%ymm12 \n\t" + "vpermilpd $0x5, %%ymm0, %%ymm0 \n\t" + "vfmadd231pd %%ymm1, %%ymm0, %%ymm13 \n\t" + "vmovapd 2 * 32(%%rbx), %%ymm1 \n\t" + "vfmadd231pd %%ymm2, %%ymm0, %%ymm14 \n\t" + "vmovapd 3 * 32(%%rbx), %%ymm2 \n\t" + "vfmadd231pd %%ymm3, %%ymm0, %%ymm15 \n\t" + "vmovapd 4 * 32(%%rbx), %%ymm3 \n\t" + " \n\t" + "prefetcht0 32 * 8(%%rax) \n\t" + " \n\t" // iteration 2 + "vbroadcastf128 8 * 8(%%rax), %%ymm0 \n\t" + "vfmadd231pd %%ymm1, %%ymm0, %%ymm4 \n\t" + "vfmadd231pd %%ymm2, %%ymm0, %%ymm5 \n\t" + "vfmadd231pd %%ymm3, %%ymm0, %%ymm6 \n\t" + "vpermilpd $0x5, %%ymm0, %%ymm0 \n\t" + "vfmadd231pd %%ymm1, %%ymm0, %%ymm7 \n\t" + "vfmadd231pd %%ymm2, %%ymm0, %%ymm8 \n\t" + "vfmadd231pd %%ymm3, %%ymm0, %%ymm9 \n\t" + "vbroadcastf128 10 * 8(%%rax), %%ymm0 \n\t" + "vfmadd231pd %%ymm1, %%ymm0, %%ymm10 \n\t" + "vfmadd231pd %%ymm2, %%ymm0, %%ymm11 \n\t" + "vfmadd231pd %%ymm3, %%ymm0, %%ymm12 \n\t" + "vpermilpd $0x5, %%ymm0, %%ymm0 \n\t" + "vfmadd231pd %%ymm1, %%ymm0, %%ymm13 \n\t" + "vmovapd 5 * 32(%%rbx), %%ymm1 \n\t" + "vfmadd231pd %%ymm2, %%ymm0, %%ymm14 \n\t" + "vmovapd 6 * 32(%%rbx), %%ymm2 \n\t" + "vfmadd231pd %%ymm3, %%ymm0, %%ymm15 \n\t" + "vmovapd 7 * 32(%%rbx), %%ymm3 \n\t" + " \n\t" + " \n\t" // iteration 3 + "vbroadcastf128 12 * 8(%%rax), %%ymm0 \n\t" + "vfmadd231pd %%ymm1, %%ymm0, %%ymm4 \n\t" + "vfmadd231pd %%ymm2, %%ymm0, %%ymm5 \n\t" + "vfmadd231pd %%ymm3, %%ymm0, %%ymm6 \n\t" + "vpermilpd $0x5, %%ymm0, %%ymm0 \n\t" + "vfmadd231pd %%ymm1, %%ymm0, %%ymm7 \n\t" + "vfmadd231pd %%ymm2, %%ymm0, %%ymm8 \n\t" + "vfmadd231pd %%ymm3, %%ymm0, %%ymm9 \n\t" + "vbroadcastf128 14 * 8(%%rax), %%ymm0 \n\t" + "vfmadd231pd %%ymm1, %%ymm0, %%ymm10 \n\t" + "vfmadd231pd %%ymm2, %%ymm0, %%ymm11 \n\t" + "vfmadd231pd %%ymm3, %%ymm0, %%ymm12 \n\t" + "vpermilpd $0x5, %%ymm0, %%ymm0 \n\t" + "vfmadd231pd %%ymm1, %%ymm0, %%ymm13 \n\t" + "vmovapd 8 * 32(%%rbx), %%ymm1 \n\t" + "vfmadd231pd %%ymm2, %%ymm0, %%ymm14 \n\t" + "vmovapd 9 * 32(%%rbx), %%ymm2 \n\t" + "vfmadd231pd %%ymm3, %%ymm0, %%ymm15 \n\t" + "vmovapd 10 * 32(%%rbx), %%ymm3 \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + "addq $4 * 4 * 8, %%rax \n\t" // a += 4*4 (unroll x mr) + "addq $4 * 12 * 8, %%rbx \n\t" // b += 4*12 (unroll x nr) + " \n\t" + " \n\t" + "decq %%rsi \n\t" // i -= 1; + "jne .DLOOPKITER \n\t" // iterate again if i != 0. + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + ".DCONSIDKLEFT: \n\t" + " \n\t" + "movq %1, %%rsi \n\t" // i = k_left; + "testq %%rsi, %%rsi \n\t" // check i via logical AND. + "je .DPOSTACCUM \n\t" // if i == 0, we're done; jump to end. + " \n\t" // else, we prepare to enter k_left loop. + " \n\t" + " \n\t" + ".DLOOPKLEFT: \n\t" // EDGE LOOP + " \n\t" + "prefetcht0 24 * 8(%%rax) \n\t" + " \n\t" + "vbroadcastf128 0 * 8(%%rax), %%ymm0 \n\t" + "vfmadd231pd %%ymm1, %%ymm0, %%ymm4 \n\t" + "vfmadd231pd %%ymm2, %%ymm0, %%ymm5 \n\t" + "vfmadd231pd %%ymm3, %%ymm0, %%ymm6 \n\t" + "vpermilpd $0x5, %%ymm0, %%ymm0 \n\t" + "vfmadd231pd %%ymm1, %%ymm0, %%ymm7 \n\t" + "vfmadd231pd %%ymm2, %%ymm0, %%ymm8 \n\t" + "vfmadd231pd %%ymm3, %%ymm0, %%ymm9 \n\t" + "vbroadcastf128 2 * 8(%%rax), %%ymm0 \n\t" + "vfmadd231pd %%ymm1, %%ymm0, %%ymm10 \n\t" + "vfmadd231pd %%ymm2, %%ymm0, %%ymm11 \n\t" + "vfmadd231pd %%ymm3, %%ymm0, %%ymm12 \n\t" + "vpermilpd $0x5, %%ymm0, %%ymm0 \n\t" + "vfmadd231pd %%ymm1, %%ymm0, %%ymm13 \n\t" + "vmovapd -1 * 32(%%rbx), %%ymm1 \n\t" + "vfmadd231pd %%ymm2, %%ymm0, %%ymm14 \n\t" + "vmovapd 0 * 32(%%rbx), %%ymm2 \n\t" + "vfmadd231pd %%ymm3, %%ymm0, %%ymm15 \n\t" + "vmovapd 1 * 32(%%rbx), %%ymm3 \n\t" + " \n\t" + " \n\t" + " \n\t" + "addq $1 * 4 * 8, %%rax \n\t" // a += 1*4 (unroll x mr) + "addq $1 * 12 * 8, %%rbx \n\t" // b += 1*12 (unroll x nr) + " \n\t" + " \n\t" + "decq %%rsi \n\t" // i -= 1; + "jne .DLOOPKLEFT \n\t" // iterate again if i != 0. + " \n\t" + " \n\t" + " \n\t" + ".DPOSTACCUM: \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + "movq %4, %%rax \n\t" // load address of alpha + "movq %5, %%rbx \n\t" // load address of beta + "vbroadcastsd (%%rax), %%ymm0 \n\t" // load alpha and duplicate + "vbroadcastsd (%%rbx), %%ymm3 \n\t" // load beta and duplicate + " \n\t" + "vmulpd %%ymm0, %%ymm4, %%ymm4 \n\t" // scale by alpha + "vmulpd %%ymm0, %%ymm5, %%ymm5 \n\t" + "vmulpd %%ymm0, %%ymm6, %%ymm6 \n\t" + "vmulpd %%ymm0, %%ymm7, %%ymm7 \n\t" + "vmulpd %%ymm0, %%ymm8, %%ymm8 \n\t" + "vmulpd %%ymm0, %%ymm9, %%ymm9 \n\t" + "vmulpd %%ymm0, %%ymm10, %%ymm10 \n\t" + "vmulpd %%ymm0, %%ymm11, %%ymm11 \n\t" + "vmulpd %%ymm0, %%ymm12, %%ymm12 \n\t" + "vmulpd %%ymm0, %%ymm13, %%ymm13 \n\t" + "vmulpd %%ymm0, %%ymm14, %%ymm14 \n\t" + "vmulpd %%ymm0, %%ymm15, %%ymm15 \n\t" + " \n\t" + " \n\t" // ymm4 : ( ab00 ab11 ab02 ab13 ) + " \n\t" // ymm7 : ( ab10 ab01 ab12 ab03 ) + " \n\t" // ymm10: ( ab20 ab31 ab22 ab33 ) + " \n\t" // ymm13: ( ab30 ab21 ab32 ab23 ) + " \n\t" + " \n\t" // ymm5 : ( ab04 ab15 ab06 ab17 ) + " \n\t" // ymm8 : ( ab14 ab05 ab16 ab07 ) + " \n\t" // ymm11: ( ab24 ab35 ab26 ab37 ) + " \n\t" // ymm14: ( ab34 ab25 ab36 ab27 ) + " \n\t" + " \n\t" // ymm6 : ( ab08 ab19 ab0A ab1B ) + " \n\t" // ymm9 : ( ab18 ab09 ab1A ab0B ) + " \n\t" // ymm12: ( ab28 ab39 ab2A ab3B ) + " \n\t" // ymm15: ( ab38 ab29 ab3A ab2B ) + "vmovapd %%ymm4, %%ymm0 \n\t" + "vshufpd $0xa, %%ymm7, %%ymm4, %%ymm4 \n\t" + "vshufpd $0xa, %%ymm0, %%ymm7, %%ymm7 \n\t" + " \n\t" + "vmovapd %%ymm5, %%ymm0 \n\t" + "vshufpd $0xa, %%ymm8, %%ymm5, %%ymm5 \n\t" + "vshufpd $0xa, %%ymm0, %%ymm8, %%ymm8 \n\t" + " \n\t" + "vmovapd %%ymm6, %%ymm0 \n\t" + "vshufpd $0xa, %%ymm9, %%ymm6, %%ymm6 \n\t" + "vshufpd $0xa, %%ymm0, %%ymm9, %%ymm9 \n\t" + " \n\t" + "vmovapd %%ymm10, %%ymm0 \n\t" + "vshufpd $0xa, %%ymm13, %%ymm10, %%ymm10 \n\t" + "vshufpd $0xa, %%ymm0, %%ymm13, %%ymm13 \n\t" + " \n\t" + "vmovapd %%ymm11, %%ymm0 \n\t" + "vshufpd $0xa, %%ymm14, %%ymm11, %%ymm11 \n\t" + "vshufpd $0xa, %%ymm0, %%ymm14, %%ymm14 \n\t" + " \n\t" + "vmovapd %%ymm12, %%ymm0 \n\t" + "vshufpd $0xa, %%ymm15, %%ymm12, %%ymm12 \n\t" + "vshufpd $0xa, %%ymm0, %%ymm15, %%ymm15 \n\t" + " \n\t" // ymm4 : ( ab00 ab01 ab02 ab03 ) + " \n\t" // ymm7 : ( ab10 ab11 ab12 ab13 ) + " \n\t" // ymm10: ( ab20 ab21 ab22 ab23 ) + " \n\t" // ymm13: ( ab30 ab31 ab32 ab33 ) + " \n\t" + " \n\t" // ymm5 : ( ab04 ab05 ab06 ab07 ) + " \n\t" // ymm8 : ( ab14 ab15 ab16 ab17 ) + " \n\t" // ymm11: ( ab24 ab25 ab26 ab27 ) + " \n\t" // ymm14: ( ab34 ab35 ab36 ab37 ) + " \n\t" + " \n\t" // ymm6 : ( ab08 ab09 ab0A ab0B ) + " \n\t" // ymm9 : ( ab18 ab19 ab1A ab1B ) + " \n\t" // ymm12: ( ab28 ab29 ab2A ab2B ) + " \n\t" // ymm15: ( ab38 ab39 ab3A ab3B ) + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + //"m" (rs_c), // 7 rdi + //"m" (cs_c), // 8 rsi + "movq %8, %%rsi \n\t" // load cs_c + "leaq (,%%rsi,8), %%rsi \n\t" // rsi = cs_c * sizeof(double) + " \n\t" + "leaq (%%rcx,%%rsi,4), %%rdx \n\t" // rdx = c + 4*cs_c; + "leaq (%%rcx,%%rsi,8), %%r12 \n\t" // r12 = c + 8*cs_c; + " \n\t" + "leaq (%%rsi,%%rsi,2), %%r13 \n\t" // r13 = 3*cs_c; + " \n\t" + " \n\t" + " \n\t" + " \n\t" // determine if + " \n\t" // c % 32 == 0, AND + " \n\t" // 8*rs_c % 32 == 0, AND + " \n\t" // cs_c == 1 + " \n\t" // ie: aligned, ldim aligned, and + " \n\t" // row-stored + " \n\t" + "cmpq $8, %%rsi \n\t" // set ZF if (8*cs_c) == 8. + "sete %%bl \n\t" // bl = ( ZF == 1 ? 1 : 0 ); + "testq $31, %%rcx \n\t" // set ZF if c & 32 is zero. + "setz %%bh \n\t" // bh = ( ZF == 0 ? 1 : 0 ); + "testq $31, %%rdi \n\t" // set ZF if (8*rs_c) & 32 is zero. + "setz %%al \n\t" // al = ( ZF == 0 ? 1 : 0 ); + " \n\t" // and(bl,bh) followed by + " \n\t" // and(bh,al) will reveal result + " \n\t" + " \n\t" // now avoid loading C if beta == 0 + " \n\t" + "vxorpd %%ymm0, %%ymm0, %%ymm0 \n\t" // set ymm0 to zero. + "vucomisd %%xmm0, %%xmm3 \n\t" // set ZF if beta == 0. + "je .DBETAZERO \n\t" // if ZF = 1, jump to beta == 0 case + " \n\t" + " \n\t" + " \n\t" // check if aligned/row-stored + "andb %%bl, %%bh \n\t" // set ZF if bl & bh == 1. + "andb %%bh, %%al \n\t" // set ZF if bh & al == 1. + "jne .DROWSTORED \n\t" // jump to row storage case + " \n\t" + " \n\t" + " \n\t" + ".DGENSTORED: \n\t" + " \n\t" + " \n\t" + DGEMM_INPUT_GS_BETA_NZ + "vfmadd213pd %%ymm4, %%ymm3, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + DGEMM_INPUT_GS_BETA_NZ + "vfmadd213pd %%ymm7, %%ymm3, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + DGEMM_INPUT_GS_BETA_NZ + "vfmadd213pd %%ymm10, %%ymm3, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + DGEMM_INPUT_GS_BETA_NZ + "vfmadd213pd %%ymm13, %%ymm3, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + //"addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "movq %%rdx, %%rcx \n\t" // rcx = c + 4*cs_c + " \n\t" + " \n\t" + DGEMM_INPUT_GS_BETA_NZ + "vfmadd213pd %%ymm5, %%ymm3, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + DGEMM_INPUT_GS_BETA_NZ + "vfmadd213pd %%ymm8, %%ymm3, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + DGEMM_INPUT_GS_BETA_NZ + "vfmadd213pd %%ymm11, %%ymm3, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + DGEMM_INPUT_GS_BETA_NZ + "vfmadd213pd %%ymm14, %%ymm3, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + //"addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "movq %%r12, %%rcx \n\t" // rcx = c + 8*cs_c + " \n\t" + " \n\t" + DGEMM_INPUT_GS_BETA_NZ + "vfmadd213pd %%ymm6, %%ymm3, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + DGEMM_INPUT_GS_BETA_NZ + "vfmadd213pd %%ymm9, %%ymm3, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + DGEMM_INPUT_GS_BETA_NZ + "vfmadd213pd %%ymm12, %%ymm3, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + DGEMM_INPUT_GS_BETA_NZ + "vfmadd213pd %%ymm15, %%ymm3, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + //"addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + "jmp .DDONE \n\t" // jump to end. + " \n\t" + " \n\t" + " \n\t" + ".DROWSTORED: \n\t" + " \n\t" + " \n\t" + "vmovapd (%%rcx), %%ymm0 \n\t" + "vfmadd213pd %%ymm4, %%ymm3, %%ymm0 \n\t" + "vmovapd %%ymm0, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovapd (%%rdx), %%ymm1 \n\t" + "vfmadd213pd %%ymm5, %%ymm3, %%ymm1 \n\t" + "vmovapd %%ymm1, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + "vmovapd (%%r12), %%ymm2 \n\t" + "vfmadd213pd %%ymm6, %%ymm3, %%ymm2 \n\t" + "vmovapd %%ymm2, (%%r12) \n\t" + "addq %%rdi, %%r12 \n\t" + " \n\t" + " \n\t" + "vmovapd (%%rcx), %%ymm0 \n\t" + "vfmadd213pd %%ymm7, %%ymm3, %%ymm0 \n\t" + "vmovapd %%ymm0, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovapd (%%rdx), %%ymm1 \n\t" + "vfmadd213pd %%ymm8, %%ymm3, %%ymm1 \n\t" + "vmovapd %%ymm1, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + "vmovapd (%%r12), %%ymm2 \n\t" + "vfmadd213pd %%ymm9, %%ymm3, %%ymm2 \n\t" + "vmovapd %%ymm2, (%%r12) \n\t" + "addq %%rdi, %%r12 \n\t" + " \n\t" + " \n\t" + "vmovapd (%%rcx), %%ymm0 \n\t" + "vfmadd213pd %%ymm10, %%ymm3, %%ymm0 \n\t" + "vmovapd %%ymm0, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovapd (%%rdx), %%ymm1 \n\t" + "vfmadd213pd %%ymm11, %%ymm3, %%ymm1 \n\t" + "vmovapd %%ymm1, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + "vmovapd (%%r12), %%ymm2 \n\t" + "vfmadd213pd %%ymm12, %%ymm3, %%ymm2 \n\t" + "vmovapd %%ymm2, (%%r12) \n\t" + "addq %%rdi, %%r12 \n\t" + " \n\t" + " \n\t" + "vmovapd (%%rcx), %%ymm0 \n\t" + "vfmadd213pd %%ymm13, %%ymm3, %%ymm0 \n\t" + "vmovapd %%ymm0, (%%rcx) \n\t" + //"addq %%rdi, %%rcx \n\t" + "vmovapd (%%rdx), %%ymm1 \n\t" + "vfmadd213pd %%ymm14, %%ymm3, %%ymm1 \n\t" + "vmovapd %%ymm1, (%%rdx) \n\t" + //"addq %%rdi, %%rdx \n\t" + "vmovapd (%%r12), %%ymm2 \n\t" + "vfmadd213pd %%ymm15, %%ymm3, %%ymm2 \n\t" + "vmovapd %%ymm2, (%%r12) \n\t" + //"addq %%rdi, %%r12 \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + "jmp .DDONE \n\t" // jump to end. + " \n\t" + " \n\t" + " \n\t" + ".DBETAZERO: \n\t" + " \n\t" // check if aligned/row-stored + "andb %%bl, %%bh \n\t" // set ZF if bl & bh == 1. + "andb %%bh, %%al \n\t" // set ZF if bh & al == 1. + "jne .DROWSTORBZ \n\t" // jump to row storage case + " \n\t" + " \n\t" + " \n\t" + ".DGENSTORBZ: \n\t" + " \n\t" + " \n\t" + "vmovapd %%ymm4, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovapd %%ymm7, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovapd %%ymm10, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovapd %%ymm13, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + //"addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "movq %%rdx, %%rcx \n\t" // rcx = c + 4*cs_c + " \n\t" + " \n\t" + "vmovapd %%ymm5, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovapd %%ymm8, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovapd %%ymm11, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovapd %%ymm14, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + //"addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "movq %%r12, %%rcx \n\t" // rcx = c + 8*cs_c + " \n\t" + " \n\t" + "vmovapd %%ymm6, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovapd %%ymm9, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovapd %%ymm12, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovapd %%ymm15, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + //"addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + "jmp .DDONE \n\t" // jump to end. + " \n\t" + " \n\t" + " \n\t" + ".DROWSTORBZ: \n\t" + " \n\t" + " \n\t" + "vmovapd %%ymm4, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovapd %%ymm5, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + "vmovapd %%ymm6, (%%r12) \n\t" + "addq %%rdi, %%r12 \n\t" + " \n\t" + "vmovapd %%ymm7, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovapd %%ymm8, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + "vmovapd %%ymm9, (%%r12) \n\t" + "addq %%rdi, %%r12 \n\t" + " \n\t" + "vmovapd %%ymm10, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovapd %%ymm11, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + "vmovapd %%ymm12, (%%r12) \n\t" + "addq %%rdi, %%r12 \n\t" + " \n\t" + "vmovapd %%ymm13, (%%rcx) \n\t" + //"addq %%rdi, %%rcx \n\t" + "vmovapd %%ymm14, (%%rdx) \n\t" + //"addq %%rdi, %%rdx \n\t" + "vmovapd %%ymm15, (%%r12) \n\t" + //"addq %%rdi, %%r12 \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + ".DDONE: \n\t" + " \n\t" + + : // output operands (none) + : // input operands + "m" (k_iter), // 0 + "m" (k_left), // 1 + "m" (a), // 2 + "m" (b), // 3 + "m" (alpha), // 4 + "m" (beta), // 5 + "m" (c), // 6 + "m" (rs_c), // 7 + "m" (cs_c)/*, // 8 + "m" (b_next), // 9 + "m" (a_next)*/ // 10 + : // register clobber list + "rax", "rbx", "rcx", "rdx", "rsi", "rdi", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "xmm0", "xmm1", "xmm2", "xmm3", + "xmm4", "xmm5", "xmm6", "xmm7", + "xmm8", "xmm9", "xmm10", "xmm11", + "xmm12", "xmm13", "xmm14", "xmm15", + "memory" + ); +} + +#if 0 + +void bli_cgemm_asm_ + ( + dim_t k, + scomplex* restrict alpha, + scomplex* restrict a, + scomplex* restrict b, + scomplex* restrict beta, + scomplex* restrict c, inc_t rs_c, inc_t cs_c, + auxinfo_t* restrict data, + cntx_t* restrict cntx + ) +{ + //void* a_next = bli_auxinfo_next_a( data ); + //void* b_next = bli_auxinfo_next_b( data ); + + //dim_t k_iter = k / 4; + //dim_t k_left = k % 4; + +} + + + +void bli_zgemm_asm_ + ( + dim_t k, + dcomplex* restrict alpha, + dcomplex* restrict a, + dcomplex* restrict b, + dcomplex* restrict beta, + dcomplex* restrict c, inc_t rs_c, inc_t cs_c, + auxinfo_t* restrict data, + cntx_t* restrict cntx + ) +{ + //void* a_next = bli_auxinfo_next_a( data ); + //void* b_next = bli_auxinfo_next_b( data ); + + //dim_t k_iter = k / 4; + //dim_t k_left = k % 4; + +} + +#endif diff --git a/kernels/x86_64/haswell/3/bli_gemm_asm_d6x8.c b/kernels/x86_64/haswell/3/bli_gemm_asm_d6x8.c new file mode 100644 index 000000000..1fb390a6a --- /dev/null +++ b/kernels/x86_64/haswell/3/bli_gemm_asm_d6x8.c @@ -0,0 +1,1308 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + + +#define SGEMM_INPUT_GS_BETA_NZ \ + "vmovlps (%%rcx ), %%xmm0, %%xmm0 \n\t" \ + "vmovhps (%%rcx,%%rsi,1), %%xmm0, %%xmm0 \n\t" \ + "vmovlps (%%rcx,%%rsi,2), %%xmm1, %%xmm1 \n\t" \ + "vmovhps (%%rcx,%%r13 ), %%xmm1, %%xmm1 \n\t" \ + "vshufps $0x88, %%xmm1, %%xmm0, %%xmm0 \n\t" \ + "vmovlps (%%rcx,%%rsi,4), %%xmm2, %%xmm2 \n\t" \ + "vmovhps (%%rcx,%%r15 ), %%xmm2, %%xmm2 \n\t" \ + "vmovlps (%%rcx,%%r13,2), %%xmm1, %%xmm1 \n\t" \ + "vmovhps (%%rcx,%%r10 ), %%xmm1, %%xmm1 \n\t" \ + "vshufps $0x88, %%xmm1, %%xmm2, %%xmm2 \n\t" \ + "vperm2f128 $0x20, %%ymm2, %%ymm0, %%ymm0 \n\t" + +#define SGEMM_OUTPUT_GS_BETA_NZ \ + "vextractf128 $1, %%ymm0, %%xmm2 \n\t" \ + "vmovss %%xmm0, (%%rcx ) \n\t" \ + "vpermilps $0x39, %%xmm0, %%xmm1 \n\t" \ + "vmovss %%xmm1, (%%rcx,%%rsi,1) \n\t" \ + "vpermilps $0x39, %%xmm1, %%xmm0 \n\t" \ + "vmovss %%xmm0, (%%rcx,%%rsi,2) \n\t" \ + "vpermilps $0x39, %%xmm0, %%xmm1 \n\t" \ + "vmovss %%xmm1, (%%rcx,%%r13 ) \n\t" \ + "vmovss %%xmm2, (%%rcx,%%rsi,4) \n\t" \ + "vpermilps $0x39, %%xmm2, %%xmm1 \n\t" \ + "vmovss %%xmm1, (%%rcx,%%r15 ) \n\t" \ + "vpermilps $0x39, %%xmm1, %%xmm2 \n\t" \ + "vmovss %%xmm2, (%%rcx,%%r13,2) \n\t" \ + "vpermilps $0x39, %%xmm2, %%xmm1 \n\t" \ + "vmovss %%xmm1, (%%rcx,%%r10 ) \n\t" + +void bli_sgemm_asm_6x16 + ( + dim_t k, + float* restrict alpha, + float* restrict a, + float* restrict b, + float* restrict beta, + float* restrict c, inc_t rs_c, inc_t cs_c, + auxinfo_t* restrict data, + cntx_t* restrict cntx + ) +{ + //void* a_next = bli_auxinfo_next_a( data ); + //void* b_next = bli_auxinfo_next_b( data ); + + dim_t k_iter = k / 4; + dim_t k_left = k % 4; + + __asm__ volatile + ( + " \n\t" + "vzeroall \n\t" // zero all xmm/ymm registers. + " \n\t" + " \n\t" + "movq %2, %%rax \n\t" // load address of a. + "movq %3, %%rbx \n\t" // load address of b. + //"movq %9, %%r15 \n\t" // load address of b_next. + " \n\t" + "addq $32 * 4, %%rbx \n\t" + " \n\t" // initialize loop by pre-loading + "vmovaps -4 * 32(%%rbx), %%ymm0 \n\t" + "vmovaps -3 * 32(%%rbx), %%ymm1 \n\t" + " \n\t" + "movq %6, %%rcx \n\t" // load address of c + "movq %7, %%rdi \n\t" // load rs_c + "leaq (,%%rdi,4), %%rdi \n\t" // rs_c *= sizeof(float) + " \n\t" + "leaq (%%rdi,%%rdi,2), %%r13 \n\t" // r13 = 3*rs_c; + "leaq (%%rcx,%%r13,1), %%rdx \n\t" // rdx = c + 3*rs_c; + "prefetcht0 7 * 8(%%rcx) \n\t" // prefetch c + 0*rs_c + "prefetcht0 7 * 8(%%rcx,%%rdi) \n\t" // prefetch c + 1*rs_c + "prefetcht0 7 * 8(%%rcx,%%rdi,2) \n\t" // prefetch c + 2*rs_c + "prefetcht0 7 * 8(%%rdx) \n\t" // prefetch c + 3*rs_c + "prefetcht0 7 * 8(%%rdx,%%rdi) \n\t" // prefetch c + 4*rs_c + "prefetcht0 7 * 8(%%rdx,%%rdi,2) \n\t" // prefetch c + 5*rs_c + " \n\t" + " \n\t" + " \n\t" + " \n\t" + "movq %0, %%rsi \n\t" // i = k_iter; + "testq %%rsi, %%rsi \n\t" // check i via logical AND. + "je .SCONSIDKLEFT \n\t" // if i == 0, jump to code that + " \n\t" // contains the k_left loop. + " \n\t" + " \n\t" + ".SLOOPKITER: \n\t" // MAIN LOOP + " \n\t" + " \n\t" + " \n\t" // iteration 0 + "prefetcht0 64 * 4(%%rax) \n\t" + " \n\t" + "vbroadcastss 0 * 4(%%rax), %%ymm2 \n\t" + "vbroadcastss 1 * 4(%%rax), %%ymm3 \n\t" + "vfmadd231ps %%ymm0, %%ymm2, %%ymm4 \n\t" + "vfmadd231ps %%ymm1, %%ymm2, %%ymm5 \n\t" + "vfmadd231ps %%ymm0, %%ymm3, %%ymm6 \n\t" + "vfmadd231ps %%ymm1, %%ymm3, %%ymm7 \n\t" + " \n\t" + "vbroadcastss 2 * 4(%%rax), %%ymm2 \n\t" + "vbroadcastss 3 * 4(%%rax), %%ymm3 \n\t" + "vfmadd231ps %%ymm0, %%ymm2, %%ymm8 \n\t" + "vfmadd231ps %%ymm1, %%ymm2, %%ymm9 \n\t" + "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" + "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" + " \n\t" + "vbroadcastss 4 * 4(%%rax), %%ymm2 \n\t" + "vbroadcastss 5 * 4(%%rax), %%ymm3 \n\t" + "vfmadd231ps %%ymm0, %%ymm2, %%ymm12 \n\t" + "vfmadd231ps %%ymm1, %%ymm2, %%ymm13 \n\t" + "vfmadd231ps %%ymm0, %%ymm3, %%ymm14 \n\t" + "vfmadd231ps %%ymm1, %%ymm3, %%ymm15 \n\t" + " \n\t" + "vmovaps -2 * 32(%%rbx), %%ymm0 \n\t" + "vmovaps -1 * 32(%%rbx), %%ymm1 \n\t" + " \n\t" + " \n\t" // iteration 1 + "vbroadcastss 6 * 4(%%rax), %%ymm2 \n\t" + "vbroadcastss 7 * 4(%%rax), %%ymm3 \n\t" + "vfmadd231ps %%ymm0, %%ymm2, %%ymm4 \n\t" + "vfmadd231ps %%ymm1, %%ymm2, %%ymm5 \n\t" + "vfmadd231ps %%ymm0, %%ymm3, %%ymm6 \n\t" + "vfmadd231ps %%ymm1, %%ymm3, %%ymm7 \n\t" + " \n\t" + "vbroadcastss 8 * 4(%%rax), %%ymm2 \n\t" + "vbroadcastss 9 * 4(%%rax), %%ymm3 \n\t" + "vfmadd231ps %%ymm0, %%ymm2, %%ymm8 \n\t" + "vfmadd231ps %%ymm1, %%ymm2, %%ymm9 \n\t" + "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" + "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" + " \n\t" + "vbroadcastss 10 * 4(%%rax), %%ymm2 \n\t" + "vbroadcastss 11 * 4(%%rax), %%ymm3 \n\t" + "vfmadd231ps %%ymm0, %%ymm2, %%ymm12 \n\t" + "vfmadd231ps %%ymm1, %%ymm2, %%ymm13 \n\t" + "vfmadd231ps %%ymm0, %%ymm3, %%ymm14 \n\t" + "vfmadd231ps %%ymm1, %%ymm3, %%ymm15 \n\t" + " \n\t" + "vmovaps 0 * 32(%%rbx), %%ymm0 \n\t" + "vmovaps 1 * 32(%%rbx), %%ymm1 \n\t" + " \n\t" + " \n\t" // iteration 2 + "prefetcht0 76 * 4(%%rax) \n\t" + " \n\t" + "vbroadcastss 12 * 4(%%rax), %%ymm2 \n\t" + "vbroadcastss 13 * 4(%%rax), %%ymm3 \n\t" + "vfmadd231ps %%ymm0, %%ymm2, %%ymm4 \n\t" + "vfmadd231ps %%ymm1, %%ymm2, %%ymm5 \n\t" + "vfmadd231ps %%ymm0, %%ymm3, %%ymm6 \n\t" + "vfmadd231ps %%ymm1, %%ymm3, %%ymm7 \n\t" + " \n\t" + "vbroadcastss 14 * 4(%%rax), %%ymm2 \n\t" + "vbroadcastss 15 * 4(%%rax), %%ymm3 \n\t" + "vfmadd231ps %%ymm0, %%ymm2, %%ymm8 \n\t" + "vfmadd231ps %%ymm1, %%ymm2, %%ymm9 \n\t" + "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" + "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" + " \n\t" + "vbroadcastss 16 * 4(%%rax), %%ymm2 \n\t" + "vbroadcastss 17 * 4(%%rax), %%ymm3 \n\t" + "vfmadd231ps %%ymm0, %%ymm2, %%ymm12 \n\t" + "vfmadd231ps %%ymm1, %%ymm2, %%ymm13 \n\t" + "vfmadd231ps %%ymm0, %%ymm3, %%ymm14 \n\t" + "vfmadd231ps %%ymm1, %%ymm3, %%ymm15 \n\t" + " \n\t" + "vmovaps 2 * 32(%%rbx), %%ymm0 \n\t" + "vmovaps 3 * 32(%%rbx), %%ymm1 \n\t" + " \n\t" + " \n\t" // iteration 3 + "vbroadcastss 18 * 4(%%rax), %%ymm2 \n\t" + "vbroadcastss 19 * 4(%%rax), %%ymm3 \n\t" + "vfmadd231ps %%ymm0, %%ymm2, %%ymm4 \n\t" + "vfmadd231ps %%ymm1, %%ymm2, %%ymm5 \n\t" + "vfmadd231ps %%ymm0, %%ymm3, %%ymm6 \n\t" + "vfmadd231ps %%ymm1, %%ymm3, %%ymm7 \n\t" + " \n\t" + "vbroadcastss 20 * 4(%%rax), %%ymm2 \n\t" + "vbroadcastss 21 * 4(%%rax), %%ymm3 \n\t" + "vfmadd231ps %%ymm0, %%ymm2, %%ymm8 \n\t" + "vfmadd231ps %%ymm1, %%ymm2, %%ymm9 \n\t" + "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" + "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" + " \n\t" + "vbroadcastss 22 * 4(%%rax), %%ymm2 \n\t" + "vbroadcastss 23 * 4(%%rax), %%ymm3 \n\t" + "vfmadd231ps %%ymm0, %%ymm2, %%ymm12 \n\t" + "vfmadd231ps %%ymm1, %%ymm2, %%ymm13 \n\t" + "vfmadd231ps %%ymm0, %%ymm3, %%ymm14 \n\t" + "vfmadd231ps %%ymm1, %%ymm3, %%ymm15 \n\t" + " \n\t" + "addq $4 * 6 * 4, %%rax \n\t" // a += 4*6 (unroll x mr) + "addq $4 * 16 * 4, %%rbx \n\t" // b += 4*16 (unroll x nr) + " \n\t" + "vmovaps -4 * 32(%%rbx), %%ymm0 \n\t" + "vmovaps -3 * 32(%%rbx), %%ymm1 \n\t" + " \n\t" + " \n\t" + "decq %%rsi \n\t" // i -= 1; + "jne .SLOOPKITER \n\t" // iterate again if i != 0. + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + ".SCONSIDKLEFT: \n\t" + " \n\t" + "movq %1, %%rsi \n\t" // i = k_left; + "testq %%rsi, %%rsi \n\t" // check i via logical AND. + "je .SPOSTACCUM \n\t" // if i == 0, we're done; jump to end. + " \n\t" // else, we prepare to enter k_left loop. + " \n\t" + " \n\t" + ".SLOOPKLEFT: \n\t" // EDGE LOOP + " \n\t" + "prefetcht0 16 * 32(%%rax) \n\t" + " \n\t" + "vbroadcastss 0 * 4(%%rax), %%ymm2 \n\t" + "vbroadcastss 1 * 4(%%rax), %%ymm3 \n\t" + "vfmadd231ps %%ymm0, %%ymm2, %%ymm4 \n\t" + "vfmadd231ps %%ymm1, %%ymm2, %%ymm5 \n\t" + "vfmadd231ps %%ymm0, %%ymm3, %%ymm6 \n\t" + "vfmadd231ps %%ymm1, %%ymm3, %%ymm7 \n\t" + " \n\t" + "vbroadcastss 2 * 4(%%rax), %%ymm2 \n\t" + "vbroadcastss 3 * 4(%%rax), %%ymm3 \n\t" + "vfmadd231ps %%ymm0, %%ymm2, %%ymm8 \n\t" + "vfmadd231ps %%ymm1, %%ymm2, %%ymm9 \n\t" + "vfmadd231ps %%ymm0, %%ymm3, %%ymm10 \n\t" + "vfmadd231ps %%ymm1, %%ymm3, %%ymm11 \n\t" + " \n\t" + "vbroadcastss 4 * 4(%%rax), %%ymm2 \n\t" + "vbroadcastss 5 * 4(%%rax), %%ymm3 \n\t" + "vfmadd231ps %%ymm0, %%ymm2, %%ymm12 \n\t" + "vfmadd231ps %%ymm1, %%ymm2, %%ymm13 \n\t" + "vfmadd231ps %%ymm0, %%ymm3, %%ymm14 \n\t" + "vfmadd231ps %%ymm1, %%ymm3, %%ymm15 \n\t" + " \n\t" + "addq $1 * 6 * 4, %%rax \n\t" // a += 1*6 (unroll x mr) + "addq $1 * 16 * 4, %%rbx \n\t" // b += 1*16 (unroll x nr) + " \n\t" + "vmovaps -4 * 32(%%rbx), %%ymm0 \n\t" + "vmovaps -3 * 32(%%rbx), %%ymm1 \n\t" + " \n\t" + " \n\t" + "decq %%rsi \n\t" // i -= 1; + "jne .SLOOPKLEFT \n\t" // iterate again if i != 0. + " \n\t" + " \n\t" + " \n\t" + ".SPOSTACCUM: \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + "movq %4, %%rax \n\t" // load address of alpha + "movq %5, %%rbx \n\t" // load address of beta + "vbroadcastss (%%rax), %%ymm0 \n\t" // load alpha and duplicate + "vbroadcastss (%%rbx), %%ymm3 \n\t" // load beta and duplicate + " \n\t" + "vmulps %%ymm0, %%ymm4, %%ymm4 \n\t" // scale by alpha + "vmulps %%ymm0, %%ymm5, %%ymm5 \n\t" + "vmulps %%ymm0, %%ymm6, %%ymm6 \n\t" + "vmulps %%ymm0, %%ymm7, %%ymm7 \n\t" + "vmulps %%ymm0, %%ymm8, %%ymm8 \n\t" + "vmulps %%ymm0, %%ymm9, %%ymm9 \n\t" + "vmulps %%ymm0, %%ymm10, %%ymm10 \n\t" + "vmulps %%ymm0, %%ymm11, %%ymm11 \n\t" + "vmulps %%ymm0, %%ymm12, %%ymm12 \n\t" + "vmulps %%ymm0, %%ymm13, %%ymm13 \n\t" + "vmulps %%ymm0, %%ymm14, %%ymm14 \n\t" + "vmulps %%ymm0, %%ymm15, %%ymm15 \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + "movq %8, %%rsi \n\t" // load cs_c + "leaq (,%%rsi,4), %%rsi \n\t" // rsi = cs_c * sizeof(float) + " \n\t" + "leaq (%%rcx,%%rsi,8), %%rdx \n\t" // load address of c + 8*cs_c; + " \n\t" + "leaq (%%rsi,%%rsi,2), %%r13 \n\t" // r13 = 3*cs_c; + "leaq (%%rsi,%%rsi,4), %%r15 \n\t" // r15 = 5*cs_c; + "leaq (%%r13,%%rsi,4), %%r10 \n\t" // r10 = 7*cs_c; + " \n\t" + " \n\t" + " \n\t" + " \n\t" // determine if + " \n\t" // c % 32 == 0, AND + " \n\t" // 4*rs_c % 32 == 0, AND + " \n\t" // cs_c == 1 + " \n\t" // ie: aligned, ldim aligned, and + " \n\t" // row-stored + " \n\t" + "cmpq $4, %%rsi \n\t" // set ZF if (4*cs_c) == 4. + "sete %%bl \n\t" // bl = ( ZF == 1 ? 1 : 0 ); + "testq $31, %%rcx \n\t" // set ZF if c & 32 is zero. + "setz %%bh \n\t" // bh = ( ZF == 0 ? 1 : 0 ); + "testq $31, %%rdi \n\t" // set ZF if (4*rs_c) & 32 is zero. + "setz %%al \n\t" // al = ( ZF == 0 ? 1 : 0 ); + " \n\t" // and(bl,bh) followed by + " \n\t" // and(bh,al) will reveal result + " \n\t" + " \n\t" // now avoid loading C if beta == 0 + " \n\t" + "vxorps %%ymm0, %%ymm0, %%ymm0 \n\t" // set ymm0 to zero. + "vucomiss %%xmm0, %%xmm3 \n\t" // set ZF if beta == 0. + "je .SBETAZERO \n\t" // if ZF = 1, jump to beta == 0 case + " \n\t" + " \n\t" + " \n\t" // check if aligned/row-stored + "andb %%bl, %%bh \n\t" // set ZF if bl & bh == 1. + "andb %%bh, %%al \n\t" // set ZF if bh & al == 1. + "jne .SROWSTORED \n\t" // jump to row storage case + " \n\t" + " \n\t" + " \n\t" + ".SGENSTORED: \n\t" + " \n\t" + " \n\t" + SGEMM_INPUT_GS_BETA_NZ + "vfmadd213ps %%ymm4, %%ymm3, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + SGEMM_INPUT_GS_BETA_NZ + "vfmadd213ps %%ymm6, %%ymm3, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + SGEMM_INPUT_GS_BETA_NZ + "vfmadd213ps %%ymm8, %%ymm3, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + SGEMM_INPUT_GS_BETA_NZ + "vfmadd213ps %%ymm10, %%ymm3, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + SGEMM_INPUT_GS_BETA_NZ + "vfmadd213ps %%ymm12, %%ymm3, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + SGEMM_INPUT_GS_BETA_NZ + "vfmadd213ps %%ymm14, %%ymm3, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + //"addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "movq %%rdx, %%rcx \n\t" // rcx = c + 8*cs_c + " \n\t" + " \n\t" + SGEMM_INPUT_GS_BETA_NZ + "vfmadd213ps %%ymm5, %%ymm3, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + SGEMM_INPUT_GS_BETA_NZ + "vfmadd213ps %%ymm7, %%ymm3, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + SGEMM_INPUT_GS_BETA_NZ + "vfmadd213ps %%ymm9, %%ymm3, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + SGEMM_INPUT_GS_BETA_NZ + "vfmadd213ps %%ymm11, %%ymm3, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + SGEMM_INPUT_GS_BETA_NZ + "vfmadd213ps %%ymm13, %%ymm3, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + SGEMM_INPUT_GS_BETA_NZ + "vfmadd213ps %%ymm15, %%ymm3, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + //"addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + " \n\t" + "jmp .SDONE \n\t" // jump to end. + " \n\t" + " \n\t" + " \n\t" + ".SROWSTORED: \n\t" + " \n\t" + " \n\t" + "vmovaps (%%rcx), %%ymm0 \n\t" + "vfmadd213ps %%ymm4, %%ymm3, %%ymm0 \n\t" + "vmovaps %%ymm0, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps (%%rdx), %%ymm1 \n\t" + "vfmadd213ps %%ymm5, %%ymm3, %%ymm1 \n\t" + "vmovaps %%ymm1, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + " \n\t" + " \n\t" + "vmovaps (%%rcx), %%ymm0 \n\t" + "vfmadd213ps %%ymm6, %%ymm3, %%ymm0 \n\t" + "vmovaps %%ymm0, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps (%%rdx), %%ymm1 \n\t" + "vfmadd213ps %%ymm7, %%ymm3, %%ymm1 \n\t" + "vmovaps %%ymm1, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + " \n\t" + " \n\t" + "vmovaps (%%rcx), %%ymm0 \n\t" + "vfmadd213ps %%ymm8, %%ymm3, %%ymm0 \n\t" + "vmovaps %%ymm0, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps (%%rdx), %%ymm1 \n\t" + "vfmadd213ps %%ymm9, %%ymm3, %%ymm1 \n\t" + "vmovaps %%ymm1, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + " \n\t" + " \n\t" + "vmovaps (%%rcx), %%ymm0 \n\t" + "vfmadd213ps %%ymm10, %%ymm3, %%ymm0 \n\t" + "vmovaps %%ymm0, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps (%%rdx), %%ymm1 \n\t" + "vfmadd213ps %%ymm11, %%ymm3, %%ymm1 \n\t" + "vmovaps %%ymm1, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + " \n\t" + " \n\t" + "vmovaps (%%rcx), %%ymm0 \n\t" + "vfmadd213ps %%ymm12, %%ymm3, %%ymm0 \n\t" + "vmovaps %%ymm0, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps (%%rdx), %%ymm1 \n\t" + "vfmadd213ps %%ymm13, %%ymm3, %%ymm1 \n\t" + "vmovaps %%ymm1, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + " \n\t" + " \n\t" + "vmovaps (%%rcx), %%ymm0 \n\t" + "vfmadd213ps %%ymm14, %%ymm3, %%ymm0 \n\t" + "vmovaps %%ymm0, (%%rcx) \n\t" + //"addq %%rdi, %%rcx \n\t" + "vmovaps (%%rdx), %%ymm1 \n\t" + "vfmadd213ps %%ymm15, %%ymm3, %%ymm1 \n\t" + "vmovaps %%ymm1, (%%rdx) \n\t" + //"addq %%rdi, %%rdx \n\t" + " \n\t" + " \n\t" + " \n\t" + "jmp .SDONE \n\t" // jump to end. + " \n\t" + " \n\t" + " \n\t" + ".SBETAZERO: \n\t" + " \n\t" // check if aligned/row-stored + "andb %%bl, %%bh \n\t" // set ZF if bl & bh == 1. + "andb %%bh, %%al \n\t" // set ZF if bh & al == 1. + "jne .SROWSTORBZ \n\t" // jump to row storage case + " \n\t" + " \n\t" + " \n\t" + ".SGENSTORBZ: \n\t" + " \n\t" + " \n\t" + "vmovaps %%ymm4, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm6, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm8, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm10, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm12, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm14, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + //"addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "movq %%rdx, %%rcx \n\t" // rcx = c + 8*cs_c + " \n\t" + " \n\t" + "vmovaps %%ymm5, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm7, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm9, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm11, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm13, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm15, %%ymm0 \n\t" + SGEMM_OUTPUT_GS_BETA_NZ + //"addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + " \n\t" + "jmp .SDONE \n\t" // jump to end. + " \n\t" + " \n\t" + " \n\t" + ".SROWSTORBZ: \n\t" + " \n\t" + " \n\t" + "vmovaps %%ymm4, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps %%ymm5, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + " \n\t" + "vmovaps %%ymm6, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps %%ymm7, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + " \n\t" + " \n\t" + "vmovaps %%ymm8, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps %%ymm9, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + " \n\t" + " \n\t" + "vmovaps %%ymm10, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps %%ymm11, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + " \n\t" + " \n\t" + "vmovaps %%ymm12, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps %%ymm13, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + " \n\t" + " \n\t" + "vmovaps %%ymm14, (%%rcx) \n\t" + //"addq %%rdi, %%rcx \n\t" + "vmovaps %%ymm15, (%%rdx) \n\t" + //"addq %%rdi, %%rdx \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + ".SDONE: \n\t" + " \n\t" + + : // output operands (none) + : // input operands + "m" (k_iter), // 0 + "m" (k_left), // 1 + "m" (a), // 2 + "m" (b), // 3 + "m" (alpha), // 4 + "m" (beta), // 5 + "m" (c), // 6 + "m" (rs_c), // 7 + "m" (cs_c)/*, // 8 + "m" (b_next), // 9 + "m" (a_next)*/ // 10 + : // register clobber list + "rax", "rbx", "rcx", "rdx", "rsi", "rdi", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "xmm0", "xmm1", "xmm2", "xmm3", + "xmm4", "xmm5", "xmm6", "xmm7", + "xmm8", "xmm9", "xmm10", "xmm11", + "xmm12", "xmm13", "xmm14", "xmm15", + "memory" + ); +} + + +#define DGEMM_INPUT_GS_BETA_NZ \ + "vmovlpd (%%rcx ), %%xmm0, %%xmm0 \n\t" \ + "vmovhpd (%%rcx,%%rsi,1), %%xmm0, %%xmm0 \n\t" \ + "vmovlpd (%%rcx,%%rsi,2), %%xmm1, %%xmm1 \n\t" \ + "vmovhpd (%%rcx,%%r13 ), %%xmm1, %%xmm1 \n\t" \ + "vperm2f128 $0x20, %%ymm1, %%ymm0, %%ymm0 \n\t" /*\ + "vmovlps (%%rcx,%%rsi,4), %%xmm2, %%xmm2 \n\t" \ + "vmovhps (%%rcx,%%r15 ), %%xmm2, %%xmm2 \n\t" \ + "vmovlps (%%rcx,%%r13,2), %%xmm1, %%xmm1 \n\t" \ + "vmovhps (%%rcx,%%r10 ), %%xmm1, %%xmm1 \n\t" \ + "vperm2f128 $0x20, %%ymm1, %%ymm2, %%ymm2 \n\t"*/ + +#define DGEMM_OUTPUT_GS_BETA_NZ \ + "vextractf128 $1, %%ymm0, %%xmm1 \n\t" \ + "vmovlpd %%xmm0, (%%rcx ) \n\t" \ + "vmovhpd %%xmm0, (%%rcx,%%rsi ) \n\t" \ + "vmovlpd %%xmm1, (%%rcx,%%rsi,2) \n\t" \ + "vmovhpd %%xmm1, (%%rcx,%%r13 ) \n\t" /*\ + "vextractf128 $1, %%ymm2, %%xmm1 \n\t" \ + "vmovlpd %%xmm2, (%%rcx,%%rsi,4) \n\t" \ + "vmovhpd %%xmm2, (%%rcx,%%r15 ) \n\t" \ + "vmovlpd %%xmm1, (%%rcx,%%r13,2) \n\t" \ + "vmovhpd %%xmm1, (%%rcx,%%r10 ) \n\t"*/ + +void bli_dgemm_asm_6x8 + ( + dim_t k, + double* restrict alpha, + double* restrict a, + double* restrict b, + double* restrict beta, + double* restrict c, inc_t rs_c, inc_t cs_c, + auxinfo_t* restrict data, + cntx_t* restrict cntx + ) +{ + //void* a_next = bli_auxinfo_next_a( data ); + //void* b_next = bli_auxinfo_next_b( data ); + + dim_t k_iter = k / 4; + dim_t k_left = k % 4; + + __asm__ volatile + ( + " \n\t" + "vzeroall \n\t" // zero all xmm/ymm registers. + " \n\t" + " \n\t" + "movq %2, %%rax \n\t" // load address of a. + "movq %3, %%rbx \n\t" // load address of b. + //"movq %9, %%r15 \n\t" // load address of b_next. + " \n\t" + "addq $32 * 4, %%rbx \n\t" + " \n\t" // initialize loop by pre-loading + "vmovaps -4 * 32(%%rbx), %%ymm0 \n\t" + "vmovaps -3 * 32(%%rbx), %%ymm1 \n\t" + " \n\t" + "movq %6, %%rcx \n\t" // load address of c + "movq %7, %%rdi \n\t" // load rs_c + "leaq (,%%rdi,8), %%rdi \n\t" // rs_c *= sizeof(double) + " \n\t" + "leaq (%%rdi,%%rdi,2), %%r13 \n\t" // r13 = 3*rs_c; + "leaq (%%rcx,%%r13,1), %%rdx \n\t" // rdx = c + 3*rs_c; + "prefetcht0 7 * 8(%%rcx) \n\t" // prefetch c + 0*rs_c + "prefetcht0 7 * 8(%%rcx,%%rdi) \n\t" // prefetch c + 1*rs_c + "prefetcht0 7 * 8(%%rcx,%%rdi,2) \n\t" // prefetch c + 2*rs_c + "prefetcht0 7 * 8(%%rdx) \n\t" // prefetch c + 3*rs_c + "prefetcht0 7 * 8(%%rdx,%%rdi) \n\t" // prefetch c + 4*rs_c + "prefetcht0 7 * 8(%%rdx,%%rdi,2) \n\t" // prefetch c + 5*rs_c + " \n\t" + " \n\t" + " \n\t" + " \n\t" + "movq %0, %%rsi \n\t" // i = k_iter; + "testq %%rsi, %%rsi \n\t" // check i via logical AND. + "je .DCONSIDKLEFT \n\t" // if i == 0, jump to code that + " \n\t" // contains the k_left loop. + " \n\t" + " \n\t" + ".DLOOPKITER: \n\t" // MAIN LOOP + " \n\t" + " \n\t" + " \n\t" // iteration 0 + "prefetcht0 64 * 8(%%rax) \n\t" + " \n\t" + "vbroadcastsd 0 * 8(%%rax), %%ymm2 \n\t" + "vbroadcastsd 1 * 8(%%rax), %%ymm3 \n\t" + "vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t" + "vfmadd231pd %%ymm1, %%ymm2, %%ymm5 \n\t" + "vfmadd231pd %%ymm0, %%ymm3, %%ymm6 \n\t" + "vfmadd231pd %%ymm1, %%ymm3, %%ymm7 \n\t" + " \n\t" + "vbroadcastsd 2 * 8(%%rax), %%ymm2 \n\t" + "vbroadcastsd 3 * 8(%%rax), %%ymm3 \n\t" + "vfmadd231pd %%ymm0, %%ymm2, %%ymm8 \n\t" + "vfmadd231pd %%ymm1, %%ymm2, %%ymm9 \n\t" + "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" + "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" + " \n\t" + "vbroadcastsd 4 * 8(%%rax), %%ymm2 \n\t" + "vbroadcastsd 5 * 8(%%rax), %%ymm3 \n\t" + "vfmadd231pd %%ymm0, %%ymm2, %%ymm12 \n\t" + "vfmadd231pd %%ymm1, %%ymm2, %%ymm13 \n\t" + "vfmadd231pd %%ymm0, %%ymm3, %%ymm14 \n\t" + "vfmadd231pd %%ymm1, %%ymm3, %%ymm15 \n\t" + " \n\t" + "vmovaps -2 * 32(%%rbx), %%ymm0 \n\t" + "vmovaps -1 * 32(%%rbx), %%ymm1 \n\t" + " \n\t" + " \n\t" // iteration 1 + "vbroadcastsd 6 * 8(%%rax), %%ymm2 \n\t" + "vbroadcastsd 7 * 8(%%rax), %%ymm3 \n\t" + "vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t" + "vfmadd231pd %%ymm1, %%ymm2, %%ymm5 \n\t" + "vfmadd231pd %%ymm0, %%ymm3, %%ymm6 \n\t" + "vfmadd231pd %%ymm1, %%ymm3, %%ymm7 \n\t" + " \n\t" + "vbroadcastsd 8 * 8(%%rax), %%ymm2 \n\t" + "vbroadcastsd 9 * 8(%%rax), %%ymm3 \n\t" + "vfmadd231pd %%ymm0, %%ymm2, %%ymm8 \n\t" + "vfmadd231pd %%ymm1, %%ymm2, %%ymm9 \n\t" + "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" + "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" + " \n\t" + "vbroadcastsd 10 * 8(%%rax), %%ymm2 \n\t" + "vbroadcastsd 11 * 8(%%rax), %%ymm3 \n\t" + "vfmadd231pd %%ymm0, %%ymm2, %%ymm12 \n\t" + "vfmadd231pd %%ymm1, %%ymm2, %%ymm13 \n\t" + "vfmadd231pd %%ymm0, %%ymm3, %%ymm14 \n\t" + "vfmadd231pd %%ymm1, %%ymm3, %%ymm15 \n\t" + " \n\t" + "vmovaps 0 * 32(%%rbx), %%ymm0 \n\t" + "vmovaps 1 * 32(%%rbx), %%ymm1 \n\t" + " \n\t" + " \n\t" // iteration 2 + "prefetcht0 76 * 8(%%rax) \n\t" + " \n\t" + "vbroadcastsd 12 * 8(%%rax), %%ymm2 \n\t" + "vbroadcastsd 13 * 8(%%rax), %%ymm3 \n\t" + "vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t" + "vfmadd231pd %%ymm1, %%ymm2, %%ymm5 \n\t" + "vfmadd231pd %%ymm0, %%ymm3, %%ymm6 \n\t" + "vfmadd231pd %%ymm1, %%ymm3, %%ymm7 \n\t" + " \n\t" + "vbroadcastsd 14 * 8(%%rax), %%ymm2 \n\t" + "vbroadcastsd 15 * 8(%%rax), %%ymm3 \n\t" + "vfmadd231pd %%ymm0, %%ymm2, %%ymm8 \n\t" + "vfmadd231pd %%ymm1, %%ymm2, %%ymm9 \n\t" + "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" + "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" + " \n\t" + "vbroadcastsd 16 * 8(%%rax), %%ymm2 \n\t" + "vbroadcastsd 17 * 8(%%rax), %%ymm3 \n\t" + "vfmadd231pd %%ymm0, %%ymm2, %%ymm12 \n\t" + "vfmadd231pd %%ymm1, %%ymm2, %%ymm13 \n\t" + "vfmadd231pd %%ymm0, %%ymm3, %%ymm14 \n\t" + "vfmadd231pd %%ymm1, %%ymm3, %%ymm15 \n\t" + " \n\t" + "vmovaps 2 * 32(%%rbx), %%ymm0 \n\t" + "vmovaps 3 * 32(%%rbx), %%ymm1 \n\t" + " \n\t" + " \n\t" // iteration 3 + "vbroadcastsd 18 * 8(%%rax), %%ymm2 \n\t" + "vbroadcastsd 19 * 8(%%rax), %%ymm3 \n\t" + "vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t" + "vfmadd231pd %%ymm1, %%ymm2, %%ymm5 \n\t" + "vfmadd231pd %%ymm0, %%ymm3, %%ymm6 \n\t" + "vfmadd231pd %%ymm1, %%ymm3, %%ymm7 \n\t" + " \n\t" + "vbroadcastsd 20 * 8(%%rax), %%ymm2 \n\t" + "vbroadcastsd 21 * 8(%%rax), %%ymm3 \n\t" + "vfmadd231pd %%ymm0, %%ymm2, %%ymm8 \n\t" + "vfmadd231pd %%ymm1, %%ymm2, %%ymm9 \n\t" + "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" + "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" + " \n\t" + "vbroadcastsd 22 * 8(%%rax), %%ymm2 \n\t" + "vbroadcastsd 23 * 8(%%rax), %%ymm3 \n\t" + "vfmadd231pd %%ymm0, %%ymm2, %%ymm12 \n\t" + "vfmadd231pd %%ymm1, %%ymm2, %%ymm13 \n\t" + "vfmadd231pd %%ymm0, %%ymm3, %%ymm14 \n\t" + "vfmadd231pd %%ymm1, %%ymm3, %%ymm15 \n\t" + " \n\t" + "addq $4 * 6 * 8, %%rax \n\t" // a += 4*6 (unroll x mr) + "addq $4 * 8 * 8, %%rbx \n\t" // b += 4*8 (unroll x nr) + " \n\t" + "vmovaps -4 * 32(%%rbx), %%ymm0 \n\t" + "vmovaps -3 * 32(%%rbx), %%ymm1 \n\t" + " \n\t" + " \n\t" + "decq %%rsi \n\t" // i -= 1; + "jne .DLOOPKITER \n\t" // iterate again if i != 0. + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + ".DCONSIDKLEFT: \n\t" + " \n\t" + "movq %1, %%rsi \n\t" // i = k_left; + "testq %%rsi, %%rsi \n\t" // check i via logical AND. + "je .DPOSTACCUM \n\t" // if i == 0, we're done; jump to end. + " \n\t" // else, we prepare to enter k_left loop. + " \n\t" + " \n\t" + ".DLOOPKLEFT: \n\t" // EDGE LOOP + " \n\t" + "prefetcht0 64 * 8(%%rax) \n\t" + " \n\t" + "vbroadcastsd 0 * 8(%%rax), %%ymm2 \n\t" + "vbroadcastsd 1 * 8(%%rax), %%ymm3 \n\t" + "vfmadd231pd %%ymm0, %%ymm2, %%ymm4 \n\t" + "vfmadd231pd %%ymm1, %%ymm2, %%ymm5 \n\t" + "vfmadd231pd %%ymm0, %%ymm3, %%ymm6 \n\t" + "vfmadd231pd %%ymm1, %%ymm3, %%ymm7 \n\t" + " \n\t" + "vbroadcastsd 2 * 8(%%rax), %%ymm2 \n\t" + "vbroadcastsd 3 * 8(%%rax), %%ymm3 \n\t" + "vfmadd231pd %%ymm0, %%ymm2, %%ymm8 \n\t" + "vfmadd231pd %%ymm1, %%ymm2, %%ymm9 \n\t" + "vfmadd231pd %%ymm0, %%ymm3, %%ymm10 \n\t" + "vfmadd231pd %%ymm1, %%ymm3, %%ymm11 \n\t" + " \n\t" + "vbroadcastsd 4 * 8(%%rax), %%ymm2 \n\t" + "vbroadcastsd 5 * 8(%%rax), %%ymm3 \n\t" + "vfmadd231pd %%ymm0, %%ymm2, %%ymm12 \n\t" + "vfmadd231pd %%ymm1, %%ymm2, %%ymm13 \n\t" + "vfmadd231pd %%ymm0, %%ymm3, %%ymm14 \n\t" + "vfmadd231pd %%ymm1, %%ymm3, %%ymm15 \n\t" + " \n\t" + "addq $1 * 6 * 8, %%rax \n\t" // a += 1*6 (unroll x mr) + "addq $1 * 8 * 8, %%rbx \n\t" // b += 1*8 (unroll x nr) + " \n\t" + "vmovaps -4 * 32(%%rbx), %%ymm0 \n\t" + "vmovaps -3 * 32(%%rbx), %%ymm1 \n\t" + " \n\t" + " \n\t" + "decq %%rsi \n\t" // i -= 1; + "jne .DLOOPKLEFT \n\t" // iterate again if i != 0. + " \n\t" + " \n\t" + " \n\t" + ".DPOSTACCUM: \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + "movq %4, %%rax \n\t" // load address of alpha + "movq %5, %%rbx \n\t" // load address of beta + "vbroadcastsd (%%rax), %%ymm0 \n\t" // load alpha and duplicate + "vbroadcastsd (%%rbx), %%ymm3 \n\t" // load beta and duplicate + " \n\t" + "vmulpd %%ymm0, %%ymm4, %%ymm4 \n\t" // scale by alpha + "vmulpd %%ymm0, %%ymm5, %%ymm5 \n\t" + "vmulpd %%ymm0, %%ymm6, %%ymm6 \n\t" + "vmulpd %%ymm0, %%ymm7, %%ymm7 \n\t" + "vmulpd %%ymm0, %%ymm8, %%ymm8 \n\t" + "vmulpd %%ymm0, %%ymm9, %%ymm9 \n\t" + "vmulpd %%ymm0, %%ymm10, %%ymm10 \n\t" + "vmulpd %%ymm0, %%ymm11, %%ymm11 \n\t" + "vmulpd %%ymm0, %%ymm12, %%ymm12 \n\t" + "vmulpd %%ymm0, %%ymm13, %%ymm13 \n\t" + "vmulpd %%ymm0, %%ymm14, %%ymm14 \n\t" + "vmulpd %%ymm0, %%ymm15, %%ymm15 \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + "movq %8, %%rsi \n\t" // load cs_c + "leaq (,%%rsi,8), %%rsi \n\t" // rsi = cs_c * sizeof(double) + " \n\t" + "leaq (%%rcx,%%rsi,4), %%rdx \n\t" // load address of c + 4*cs_c; + " \n\t" + "leaq (%%rsi,%%rsi,2), %%r13 \n\t" // r13 = 3*cs_c; + //"leaq (%%rsi,%%rsi,4), %%r15 \n\t" // r15 = 5*cs_c; + //"leaq (%%r13,%%rsi,4), %%r10 \n\t" // r10 = 7*cs_c; + " \n\t" + " \n\t" + " \n\t" + " \n\t" // determine if + " \n\t" // c % 32 == 0, AND + " \n\t" // 8*rs_c % 32 == 0, AND + " \n\t" // cs_c == 1 + " \n\t" // ie: aligned, ldim aligned, and + " \n\t" // row-stored + " \n\t" + "cmpq $8, %%rsi \n\t" // set ZF if (8*cs_c) == 8. + "sete %%bl \n\t" // bl = ( ZF == 1 ? 1 : 0 ); + "testq $31, %%rcx \n\t" // set ZF if c & 32 is zero. + "setz %%bh \n\t" // bh = ( ZF == 0 ? 1 : 0 ); + "testq $31, %%rdi \n\t" // set ZF if (8*rs_c) & 32 is zero. + "setz %%al \n\t" // al = ( ZF == 0 ? 1 : 0 ); + " \n\t" // and(bl,bh) followed by + " \n\t" // and(bh,al) will reveal result + " \n\t" + " \n\t" // now avoid loading C if beta == 0 + " \n\t" + "vxorpd %%ymm0, %%ymm0, %%ymm0 \n\t" // set ymm0 to zero. + "vucomisd %%xmm0, %%xmm3 \n\t" // set ZF if beta == 0. + "je .DBETAZERO \n\t" // if ZF = 1, jump to beta == 0 case + " \n\t" + " \n\t" + " \n\t" // check if aligned/row-stored + "andb %%bl, %%bh \n\t" // set ZF if bl & bh == 1. + "andb %%bh, %%al \n\t" // set ZF if bh & al == 1. + "jne .DROWSTORED \n\t" // jump to row storage case + " \n\t" + " \n\t" + " \n\t" + ".DGENSTORED: \n\t" + " \n\t" + " \n\t" + DGEMM_INPUT_GS_BETA_NZ + "vfmadd213pd %%ymm4, %%ymm3, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + DGEMM_INPUT_GS_BETA_NZ + "vfmadd213pd %%ymm6, %%ymm3, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + DGEMM_INPUT_GS_BETA_NZ + "vfmadd213pd %%ymm8, %%ymm3, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + DGEMM_INPUT_GS_BETA_NZ + "vfmadd213pd %%ymm10, %%ymm3, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + DGEMM_INPUT_GS_BETA_NZ + "vfmadd213pd %%ymm12, %%ymm3, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + DGEMM_INPUT_GS_BETA_NZ + "vfmadd213pd %%ymm14, %%ymm3, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + " \n\t" + " \n\t" + "movq %%rdx, %%rcx \n\t" // rcx = c + 4*cs_c + " \n\t" + " \n\t" + DGEMM_INPUT_GS_BETA_NZ + "vfmadd213pd %%ymm5, %%ymm3, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + DGEMM_INPUT_GS_BETA_NZ + "vfmadd213pd %%ymm7, %%ymm3, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + DGEMM_INPUT_GS_BETA_NZ + "vfmadd213pd %%ymm9, %%ymm3, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + DGEMM_INPUT_GS_BETA_NZ + "vfmadd213pd %%ymm11, %%ymm3, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + DGEMM_INPUT_GS_BETA_NZ + "vfmadd213pd %%ymm13, %%ymm3, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + DGEMM_INPUT_GS_BETA_NZ + "vfmadd213pd %%ymm15, %%ymm3, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + " \n\t" + " \n\t" + " \n\t" + "jmp .DDONE \n\t" // jump to end. + " \n\t" + " \n\t" + " \n\t" + ".DROWSTORED: \n\t" + " \n\t" + " \n\t" + "vmovaps (%%rcx), %%ymm0 \n\t" + "vfmadd213pd %%ymm4, %%ymm3, %%ymm0 \n\t" + "vmovaps %%ymm0, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps (%%rdx), %%ymm1 \n\t" + "vfmadd213pd %%ymm5, %%ymm3, %%ymm1 \n\t" + "vmovaps %%ymm1, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + " \n\t" + " \n\t" + "vmovaps (%%rcx), %%ymm0 \n\t" + "vfmadd213pd %%ymm6, %%ymm3, %%ymm0 \n\t" + "vmovaps %%ymm0, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps (%%rdx), %%ymm1 \n\t" + "vfmadd213pd %%ymm7, %%ymm3, %%ymm1 \n\t" + "vmovaps %%ymm1, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + " \n\t" + " \n\t" + "vmovaps (%%rcx), %%ymm0 \n\t" + "vfmadd213pd %%ymm8, %%ymm3, %%ymm0 \n\t" + "vmovaps %%ymm0, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps (%%rdx), %%ymm1 \n\t" + "vfmadd213pd %%ymm9, %%ymm3, %%ymm1 \n\t" + "vmovaps %%ymm1, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + " \n\t" + " \n\t" + "vmovaps (%%rcx), %%ymm0 \n\t" + "vfmadd213pd %%ymm10, %%ymm3, %%ymm0 \n\t" + "vmovaps %%ymm0, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps (%%rdx), %%ymm1 \n\t" + "vfmadd213pd %%ymm11, %%ymm3, %%ymm1 \n\t" + "vmovaps %%ymm1, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + " \n\t" + " \n\t" + "vmovaps (%%rcx), %%ymm0 \n\t" + "vfmadd213pd %%ymm12, %%ymm3, %%ymm0 \n\t" + "vmovaps %%ymm0, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps (%%rdx), %%ymm1 \n\t" + "vfmadd213pd %%ymm13, %%ymm3, %%ymm1 \n\t" + "vmovaps %%ymm1, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + " \n\t" + " \n\t" + "vmovaps (%%rcx), %%ymm0 \n\t" + "vfmadd213pd %%ymm14, %%ymm3, %%ymm0 \n\t" + "vmovaps %%ymm0, (%%rcx) \n\t" + //"addq %%rdi, %%rcx \n\t" + "vmovaps (%%rdx), %%ymm1 \n\t" + "vfmadd213pd %%ymm15, %%ymm3, %%ymm1 \n\t" + "vmovaps %%ymm1, (%%rdx) \n\t" + //"addq %%rdi, %%rdx \n\t" + " \n\t" + " \n\t" + " \n\t" + "jmp .DDONE \n\t" // jump to end. + " \n\t" + " \n\t" + " \n\t" + ".DBETAZERO: \n\t" + " \n\t" // check if aligned/row-stored + "andb %%bl, %%bh \n\t" // set ZF if bl & bh == 1. + "andb %%bh, %%al \n\t" // set ZF if bh & al == 1. + "jne .DROWSTORBZ \n\t" // jump to row storage case + " \n\t" + " \n\t" + " \n\t" + ".DGENSTORBZ: \n\t" + " \n\t" + " \n\t" + "vmovaps %%ymm4, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm6, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm8, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm10, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm12, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm14, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + " \n\t" + " \n\t" + "movq %%rdx, %%rcx \n\t" // rcx = c + 4*cs_c + " \n\t" + " \n\t" + "vmovaps %%ymm5, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm7, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm9, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm11, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm13, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + "addq %%rdi, %%rcx \n\t" // c += rs_c; + " \n\t" + " \n\t" + "vmovaps %%ymm15, %%ymm0 \n\t" + DGEMM_OUTPUT_GS_BETA_NZ + " \n\t" + " \n\t" + " \n\t" + "jmp .DDONE \n\t" // jump to end. + " \n\t" + " \n\t" + " \n\t" + ".DROWSTORBZ: \n\t" + " \n\t" + " \n\t" + "vmovaps %%ymm4, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps %%ymm5, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + " \n\t" + "vmovaps %%ymm6, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps %%ymm7, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + " \n\t" + " \n\t" + "vmovaps %%ymm8, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps %%ymm9, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + " \n\t" + " \n\t" + "vmovaps %%ymm10, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps %%ymm11, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + " \n\t" + " \n\t" + "vmovaps %%ymm12, (%%rcx) \n\t" + "addq %%rdi, %%rcx \n\t" + "vmovaps %%ymm13, (%%rdx) \n\t" + "addq %%rdi, %%rdx \n\t" + " \n\t" + " \n\t" + "vmovaps %%ymm14, (%%rcx) \n\t" + //"addq %%rdi, %%rcx \n\t" + "vmovaps %%ymm15, (%%rdx) \n\t" + //"addq %%rdi, %%rdx \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + " \n\t" + ".DDONE: \n\t" + " \n\t" + + : // output operands (none) + : // input operands + "m" (k_iter), // 0 + "m" (k_left), // 1 + "m" (a), // 2 + "m" (b), // 3 + "m" (alpha), // 4 + "m" (beta), // 5 + "m" (c), // 6 + "m" (rs_c), // 7 + "m" (cs_c)/*, // 8 + "m" (b_next), // 9 + "m" (a_next)*/ // 10 + : // register clobber list + "rax", "rbx", "rcx", "rdx", "rsi", "rdi", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "xmm0", "xmm1", "xmm2", "xmm3", + "xmm4", "xmm5", "xmm6", "xmm7", + "xmm8", "xmm9", "xmm10", "xmm11", + "xmm12", "xmm13", "xmm14", "xmm15", + "memory" + ); +} + +#if 0 + +void bli_cgemm_asm_ + ( + dim_t k, + scomplex* restrict alpha, + scomplex* restrict a, + scomplex* restrict b, + scomplex* restrict beta, + scomplex* restrict c, inc_t rs_c, inc_t cs_c, + auxinfo_t* restrict data, + cntx_t* restrict cntx + ) +{ + //void* a_next = bli_auxinfo_next_a( data ); + //void* b_next = bli_auxinfo_next_b( data ); + + //dim_t k_iter = k / 4; + //dim_t k_left = k % 4; + +} + + + +void bli_zgemm_asm_ + ( + dim_t k, + dcomplex* restrict alpha, + dcomplex* restrict a, + dcomplex* restrict b, + dcomplex* restrict beta, + dcomplex* restrict c, inc_t rs_c, inc_t cs_c, + auxinfo_t* restrict data, + cntx_t* restrict cntx + ) +{ + //void* a_next = bli_auxinfo_next_a( data ); + //void* b_next = bli_auxinfo_next_b( data ); + + //dim_t k_iter = k / 4; + //dim_t k_left = k % 4; + +} + +#endif diff --git a/kernels/x86_64/haswell/3/bli_gemm_asm_d8x6.c b/kernels/x86_64/haswell/3/bli_gemm_asm_d8x6.c index 0a49f8989..caf16305b 100644 --- a/kernels/x86_64/haswell/3/bli_gemm_asm_d8x6.c +++ b/kernels/x86_64/haswell/3/bli_gemm_asm_d8x6.c @@ -124,7 +124,7 @@ void bli_sgemm_asm_16x6 " \n\t" " \n\t" " \n\t" // iteration 0 - "prefetcht0 16 * 32(%%rax) \n\t" + "prefetcht0 128 * 4(%%rax) \n\t" " \n\t" "vbroadcastss 0 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 1 * 4(%%rbx), %%ymm3 \n\t" @@ -150,8 +150,6 @@ void bli_sgemm_asm_16x6 "vmovaps -2 * 32(%%rax), %%ymm0 \n\t" "vmovaps -1 * 32(%%rax), %%ymm1 \n\t" " \n\t" - " \n\t" - " \n\t" " \n\t" // iteration 1 "vbroadcastss 6 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 7 * 4(%%rbx), %%ymm3 \n\t" @@ -177,10 +175,8 @@ void bli_sgemm_asm_16x6 "vmovaps 0 * 32(%%rax), %%ymm0 \n\t" "vmovaps 1 * 32(%%rax), %%ymm1 \n\t" " \n\t" - " \n\t" - " \n\t" " \n\t" // iteration 2 - "prefetcht0 20 * 32(%%rax) \n\t" + "prefetcht0 160 * 4(%%rax) \n\t" " \n\t" "vbroadcastss 12 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 13 * 4(%%rbx), %%ymm3 \n\t" @@ -206,8 +202,6 @@ void bli_sgemm_asm_16x6 "vmovaps 2 * 32(%%rax), %%ymm0 \n\t" "vmovaps 3 * 32(%%rax), %%ymm1 \n\t" " \n\t" - " \n\t" - " \n\t" " \n\t" // iteration 3 "vbroadcastss 18 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 19 * 4(%%rbx), %%ymm3 \n\t" @@ -255,7 +249,7 @@ void bli_sgemm_asm_16x6 " \n\t" ".SLOOPKLEFT: \n\t" // EDGE LOOP " \n\t" - "prefetcht0 16 * 32(%%rax) \n\t" + "prefetcht0 128 * 4(%%rax) \n\t" " \n\t" "vbroadcastss 0 * 4(%%rbx), %%ymm2 \n\t" "vbroadcastss 1 * 4(%%rbx), %%ymm3 \n\t" @@ -395,6 +389,7 @@ void bli_sgemm_asm_16x6 SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm14, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ + //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "movq %%rdx, %%rcx \n\t" // rcx = c + 8*rs_c @@ -433,6 +428,7 @@ void bli_sgemm_asm_16x6 SGEMM_INPUT_GS_BETA_NZ "vfmadd213ps %%ymm15, %%ymm3, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ + //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" " \n\t" @@ -496,11 +492,11 @@ void bli_sgemm_asm_16x6 "vmovaps (%%rcx), %%ymm0 \n\t" "vfmadd213ps %%ymm14, %%ymm3, %%ymm0 \n\t" "vmovaps %%ymm0, (%%rcx) \n\t" - "addq %%rdi, %%rcx \n\t" + //"addq %%rdi, %%rcx \n\t" "vmovaps (%%rdx), %%ymm1 \n\t" "vfmadd213ps %%ymm15, %%ymm3, %%ymm1 \n\t" "vmovaps %%ymm1, (%%rdx) \n\t" - "addq %%rdi, %%rdx \n\t" + //"addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" " \n\t" @@ -546,6 +542,7 @@ void bli_sgemm_asm_16x6 " \n\t" "vmovaps %%ymm14, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ + //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "movq %%rdx, %%rcx \n\t" // rcx = c + 8*rs_c @@ -578,6 +575,7 @@ void bli_sgemm_asm_16x6 " \n\t" "vmovaps %%ymm15, %%ymm0 \n\t" SGEMM_OUTPUT_GS_BETA_NZ + //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" " \n\t" @@ -618,8 +616,9 @@ void bli_sgemm_asm_16x6 " \n\t" " \n\t" "vmovaps %%ymm14, (%%rcx) \n\t" - " \n\t" + //"addq %%rdi, %%rcx \n\t" "vmovaps %%ymm15, (%%rdx) \n\t" + //"addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" " \n\t" @@ -737,7 +736,7 @@ void bli_dgemm_asm_8x6 " \n\t" " \n\t" " \n\t" // iteration 0 - "prefetcht0 16 * 32(%%rax) \n\t" + "prefetcht0 64 * 8(%%rax) \n\t" " \n\t" "vbroadcastsd 0 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 1 * 8(%%rbx), %%ymm3 \n\t" @@ -763,8 +762,6 @@ void bli_dgemm_asm_8x6 "vmovaps -2 * 32(%%rax), %%ymm0 \n\t" "vmovaps -1 * 32(%%rax), %%ymm1 \n\t" " \n\t" - " \n\t" - " \n\t" " \n\t" // iteration 1 "vbroadcastsd 6 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 7 * 8(%%rbx), %%ymm3 \n\t" @@ -790,10 +787,8 @@ void bli_dgemm_asm_8x6 "vmovaps 0 * 32(%%rax), %%ymm0 \n\t" "vmovaps 1 * 32(%%rax), %%ymm1 \n\t" " \n\t" - " \n\t" - " \n\t" " \n\t" // iteration 2 - "prefetcht0 20 * 32(%%rax) \n\t" + "prefetcht0 76 * 8(%%rax) \n\t" " \n\t" "vbroadcastsd 12 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 13 * 8(%%rbx), %%ymm3 \n\t" @@ -819,8 +814,6 @@ void bli_dgemm_asm_8x6 "vmovaps 2 * 32(%%rax), %%ymm0 \n\t" "vmovaps 3 * 32(%%rax), %%ymm1 \n\t" " \n\t" - " \n\t" - " \n\t" " \n\t" // iteration 3 "vbroadcastsd 18 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 19 * 8(%%rbx), %%ymm3 \n\t" @@ -868,7 +861,7 @@ void bli_dgemm_asm_8x6 " \n\t" ".DLOOPKLEFT: \n\t" // EDGE LOOP " \n\t" - "prefetcht0 16 * 32(%%rax) \n\t" + "prefetcht0 64 * 8(%%rax) \n\t" " \n\t" "vbroadcastsd 0 * 8(%%rbx), %%ymm2 \n\t" "vbroadcastsd 1 * 8(%%rbx), %%ymm3 \n\t" @@ -1008,6 +1001,7 @@ void bli_dgemm_asm_8x6 DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm14, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ + //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "movq %%rdx, %%rcx \n\t" // rcx = c + 4*rs_c @@ -1046,6 +1040,7 @@ void bli_dgemm_asm_8x6 DGEMM_INPUT_GS_BETA_NZ "vfmadd213pd %%ymm15, %%ymm3, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ + //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" " \n\t" @@ -1159,6 +1154,7 @@ void bli_dgemm_asm_8x6 " \n\t" "vmovaps %%ymm14, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ + //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" "movq %%rdx, %%rcx \n\t" // rcx = c + 4*rs_c @@ -1191,6 +1187,7 @@ void bli_dgemm_asm_8x6 " \n\t" "vmovaps %%ymm15, %%ymm0 \n\t" DGEMM_OUTPUT_GS_BETA_NZ + //"addq %%rdi, %%rcx \n\t" // c += cs_c; " \n\t" " \n\t" " \n\t" @@ -1231,8 +1228,9 @@ void bli_dgemm_asm_8x6 " \n\t" " \n\t" "vmovaps %%ymm14, (%%rcx) \n\t" - " \n\t" + //"addq %%rdi, %%rcx \n\t" "vmovaps %%ymm15, (%%rdx) \n\t" + //"addq %%rdi, %%rdx \n\t" " \n\t" " \n\t" " \n\t" diff --git a/test/3m4m/Makefile b/test/3m4m/Makefile index 66eb8dec4..9e982032f 100644 --- a/test/3m4m/Makefile +++ b/test/3m4m/Makefile @@ -55,15 +55,11 @@ # --- Makefile initialization -------------------------------------------------- # -# Define the name of the configuration file. -CONFIG_MK_FILE := config.mk +# Define the name of the common makefile fragment. +COMMON_MK_FILE := common.mk -# Define the name of the file containing build and architecture-specific -# makefile definitions. -MAKE_DEFS_FILE := make_defs.mk - -# Locations of important files. -ROOT_PATH := ../.. +# Important locations and directory names. +RELPATH := ../.. CONFIG_DIR := config @@ -74,43 +70,26 @@ CONFIG_DIR := config # Construct the path to the makefile configuration file that was generated by # the configure script. -CONFIG_MK_PATH := $(ROOT_PATH)/$(CONFIG_MK_FILE) +COMMON_MK_PATH := $(RELPATH)/$(COMMON_MK_FILE) -# Include the configuration file. --include $(CONFIG_MK_PATH) +# Include the common makefile fragment. +-include $(COMMON_MK_PATH) -# Detect whether we actually got the configuration file. If we didn't, then -# it is likely that the user has not yet generated it (via configure). -ifeq ($(strip $(CONFIG_MK_INCLUDED)),yes) -CONFIG_MK_PRESENT := yes +# Detect whether we actually got the common makefile fragment. If we didn't, +# then it is likely that the user has not yet generated it (via configure). +ifeq ($(strip $(COMMON_MK_INCLUDED)),yes) +COMMON_MK_PRESENT := yes else -CONFIG_MK_PRESENT := no +COMMON_MK_PRESENT := no endif +# Override the DIST_PATH value obtained from config.mk, since it is relative +# to the build directory. +DIST_PATH := .. + # Now we have access to CONFIG_NAME, which tells us which sub-directory of the # config directory to use as our configuration. -CONFIG_PATH := $(ROOT_PATH)/$(CONFIG_DIR)/$(CONFIG_NAME) - - - -# -# --- Include makefile definitions file ---------------------------------------- -# - -# Construct the path to the makefile definitions file residing inside of -# the configuration sub-directory. -MAKE_DEFS_MK_PATH := $(CONFIG_PATH)/$(MAKE_DEFS_FILE) - -# Include the makefile definitions file. --include $(MAKE_DEFS_MK_PATH) - -# Detect whether we actually got the make definitios file. If we didn't, then -# it is likely that the configuration is invalid (or incomplete). -ifeq ($(strip $(MAKE_DEFS_MK_INCLUDED)),yes) -MAKE_DEFS_MK_PRESENT := yes -else -MAKE_DEFS_MK_PRESENT := no -endif +CONFIG_PATH := $(RELPATH)/$(CONFIG_DIR)/$(CONFIG_NAME) @@ -127,7 +106,8 @@ BLIS_LIB := $(BLIS_LIB_PATH)/libblis.a # BLAS library path(s). This is where the BLAS libraries reside. HOME_LIB_PATH := $(HOME)/flame/lib -MKL_LIB_PATH := /opt/apps/intel/13/composer_xe_2013.2.146/mkl/lib/intel64 +#MKL_LIB_PATH := /opt/apps/intel/13/composer_xe_2013.2.146/mkl/lib/intel64 +MKL_LIB_PATH := $(HOME)/intel/mkl/lib/intel64 ICC_LIB_PATH := /opt/apps/intel/13/composer_xe_2013.2.146/compiler/lib/intel64 ACML_LIB_PATH := $(HOME_LIB_PATH)/acml/5.3.1/gfortran64_fma4_int64/lib ACMLP_LIB_PATH := $(HOME_LIB_PATH)/acml/5.3.1/gfortran64_fma4_mp_int64/lib @@ -142,9 +122,10 @@ ATLAS_LIB := $(HOME_LIB_PATH)/libf77blas.a \ # MKL MKL_LIB := -L$(MKL_LIB_PATH) \ - -lmkl_sequential \ + -lmkl_intel_lp64 \ -lmkl_core \ - -lmkl_intel_ilp64 + -lmkl_sequential \ + -lpthread -lm -ldl #MKLP_LIB := -L$(MKL_LIB_PATH) \ # -lmkl_intel_thread \ # -lmkl_core \ @@ -152,11 +133,12 @@ MKL_LIB := -L$(MKL_LIB_PATH) \ # -L$(ICC_LIB_PATH) \ # -liomp5 MKLP_LIB := -L$(MKL_LIB_PATH) \ - -lmkl_gnu_thread \ + -lmkl_intel_lp64 \ -lmkl_core \ - -lmkl_intel_ilp64 \ - -L$(ICC_LIB_PATH) \ - -lgomp + -lmkl_gnu_thread \ + -lpthread -lm -ldl + #-L$(ICC_LIB_PATH) \ + #-lgomp # ACML ACML_LIB := -L$(ACML_LIB_PATH) \ @@ -227,9 +209,9 @@ STR_ST := -DTHR_STR=\"st\" STR_MT := -DTHR_STR=\"mt\" # Problem size specification -PDEF_ST := -DP_BEGIN=40 \ +PDEF_ST := -DP_BEGIN=80 \ -DP_END=2000 \ - -DP_INC=40 + -DP_INC=80 PDEF_MT := -DP_BEGIN=80 \ -DP_END=4000 \ diff --git a/test/Makefile b/test/Makefile index bbde792dc..92b8c7df9 100644 --- a/test/Makefile +++ b/test/Makefile @@ -54,15 +54,11 @@ # --- Makefile initialization -------------------------------------------------- # -# Define the name of the configuration file. -CONFIG_MK_FILE := config.mk +# Define the name of the common makefile fragment. +COMMON_MK_FILE := common.mk -# Define the name of the file containing build and architecture-specific -# makefile definitions. -MAKE_DEFS_FILE := make_defs.mk - -# Locations of important files. -ROOT_PATH := .. +# Important locations and directory names. +RELPATH := .. CONFIG_DIR := config @@ -73,43 +69,26 @@ CONFIG_DIR := config # Construct the path to the makefile configuration file that was generated by # the configure script. -CONFIG_MK_PATH := $(ROOT_PATH)/$(CONFIG_MK_FILE) +COMMON_MK_PATH := $(RELPATH)/$(COMMON_MK_FILE) -# Include the configuration file. --include $(CONFIG_MK_PATH) +# Include the common makefile fragment. +-include $(COMMON_MK_PATH) -# Detect whether we actually got the configuration file. If we didn't, then -# it is likely that the user has not yet generated it (via configure). -ifeq ($(strip $(CONFIG_MK_INCLUDED)),yes) -CONFIG_MK_PRESENT := yes +# Detect whether we actually got the common makefile fragment. If we didn't, +# then it is likely that the user has not yet generated it (via configure). +ifeq ($(strip $(COMMON_MK_INCLUDED)),yes) +COMMON_MK_PRESENT := yes else -CONFIG_MK_PRESENT := no +COMMON_MK_PRESENT := no endif +# Override the DIST_PATH value obtained from config.mk, since it is relative +# to the build directory. +DIST_PATH := .. + # Now we have access to CONFIG_NAME, which tells us which sub-directory of the # config directory to use as our configuration. -CONFIG_PATH := $(ROOT_PATH)/$(CONFIG_DIR)/$(CONFIG_NAME) - - - -# -# --- Include makefile definitions file ---------------------------------------- -# - -# Construct the path to the makefile definitions file residing inside of -# the configuration sub-directory. -MAKE_DEFS_MK_PATH := $(CONFIG_PATH)/$(MAKE_DEFS_FILE) - -# Include the makefile definitions file. --include $(MAKE_DEFS_MK_PATH) - -# Detect whether we actually got the make definitios file. If we didn't, then -# it is likely that the configuration is invalid (or incomplete). -ifeq ($(strip $(MAKE_DEFS_MK_INCLUDED)),yes) -MAKE_DEFS_MK_PRESENT := yes -else -MAKE_DEFS_MK_PRESENT := no -endif +CONFIG_PATH := $(RELPATH)/$(CONFIG_DIR)/$(CONFIG_NAME) @@ -126,7 +105,8 @@ BLIS_LIB := $(BLIS_LIB_PATH)/libblis.a # BLAS library path(s). This is where the BLAS libraries reside. BLAS_LIB_PATH := $(HOME)/flame/lib -MKL_LIB_PATH := /opt/apps/intel/13/composer_xe_2013.2.146/mkl/lib/intel64 +#MKL_LIB_PATH := /opt/apps/intel/13/composer_xe_2013.2.146/mkl/lib/intel64 +MKL_LIB_PATH := $(HOME)/intel/mkl/lib/intel64 ESSL_LIB_PATH := $(HOME)/path/to/essl/changeme # OpenBLAS @@ -138,9 +118,10 @@ ATLAS_LIB := $(BLAS_LIB_PATH)/libf77blas.a \ # MKL MKL_LIB := -L$(MKL_LIB_PATH) \ - -lmkl_sequential \ + -lmkl_intel_lp64 \ -lmkl_core \ - -lmkl_intel_lp64 + -lmkl_sequential \ + -lpthread -lm -ldl # ESSL # Note: ESSL is named differently for SMP and/or BG @@ -188,7 +169,7 @@ LDFLAGS += -lgfortran -lm -lpthread -fopenmp # blis openblas atlas mkl mac essl # #all: blis openblas atlas mkl -all: blis openblas +all: blis openblas mkl blis: test_gemv_blis.x \ test_ger_blis.x \ diff --git a/testsuite/Makefile b/testsuite/Makefile index ff421645c..6a1954d8c 100644 --- a/testsuite/Makefile +++ b/testsuite/Makefile @@ -46,7 +46,7 @@ .PHONY: all bin clean \ check-env check-env-mk check-env-fragments check-env-make-defs \ - run run-amd64 run-x86 run-arm + run run-amd64 run-x86 run-arm @@ -54,15 +54,15 @@ # --- Makefile initialization -------------------------------------------------- # -RELPATH := .. -# Define the name of the common makefile. +# Define the name of the common makefile fragment. COMMON_MK_FILE := common.mk # All makefile fragments in the tree will have this name. FRAGMENT_MK := .fragment.mk -# Locations of important files. +# Important locations and directory names. +RELPATH := .. CONFIG_DIR := config FRAME_DIR := frame LIB_DIR := lib @@ -77,11 +77,11 @@ LIB_DIR := lib # the configure script. COMMON_MK_PATH := ../$(COMMON_MK_FILE) -# Include the configuration file. +# Include the common makefile fragment. -include $(COMMON_MK_PATH) -# Detect whether we actually got the configuration file. If we didn't, then -# it is likely that the user has not yet generated it (via configure). +# Detect whether we actually got the common makefile fragment. If we didn't, +# then it is likely that the user has not yet generated it (via configure). ifeq ($(strip $(COMMON_MK_INCLUDED)),yes) COMMON_MK_PRESENT := yes else From 97b512ef62c7e25c97ed5e9eca81cd7015b2ac91 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Fri, 6 May 2016 10:24:30 -0500 Subject: [PATCH 13/14] Include headers from cblas.h to pull in f77_int. Details: - Added #include statements for certain key BLIS headers so that the definition of f77_int is pulled in when a user compiles application code with only #include "cblas.h" (and no other BLIS header). This is necessary since f77_int is now used within the cblas API. --- frame/compat/cblas/src/cblas.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/frame/compat/cblas/src/cblas.h b/frame/compat/cblas/src/cblas.h index f8b4d43a4..1ee6209c9 100644 --- a/frame/compat/cblas/src/cblas.h +++ b/frame/compat/cblas/src/cblas.h @@ -2,6 +2,14 @@ #define CBLAS_H #include +// We need to #include "bli_type_defs.h" in order to pull in the +// definition of f77_int. But in order to #include that header, we +// also need to pull in the headers that precede it in blis.h. +#include "bli_system.h" +#include "bli_config.h" +#include "bli_config_macro_defs.h" +#include "bli_type_defs.h" + /* * Enumerated and derived types */ From 4b1e55edbfe0e1cb2e7b9428424903497cb7a841 Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Tue, 10 May 2016 10:08:47 -0500 Subject: [PATCH 14/14] Default-initialize all extern global variables to avoid generating common symbols. Fixes #73. --- frame/1/packv/bli_packv_cntl.c | 2 +- frame/1/scalv/bli_scalv_cntl.c | 2 +- frame/1/unpackv/bli_unpackv_cntl.c | 2 +- frame/1m/packm/bli_packm_cntl.c | 6 +++--- frame/1m/scalm/bli_scalm_cntl.c | 2 +- frame/1m/unpackm/bli_unpackm_cntl.c | 2 +- frame/2/gemv/bli_gemv_cntl.c | 16 ++++++++-------- frame/2/ger/bli_ger_cntl.c | 16 ++++++++-------- frame/2/hemv/bli_hemv_cntl.c | 8 ++++---- frame/2/her/bli_her_cntl.c | 8 ++++---- frame/2/her2/bli_her2_cntl.c | 8 ++++---- frame/2/trmv/bli_trmv_cntl.c | 8 ++++---- frame/2/trsv/bli_trsv_cntl.c | 8 ++++---- frame/3/gemm/bli_gemm_cntl.c | 14 +++++++------- frame/3/trsm/bli_trsm_cntl.c | 26 +++++++++++++------------- frame/base/bli_const.c | 14 +++++++------- frame/base/bli_getopt.c | 4 ++-- frame/base/bli_threading.c | 8 ++++---- 18 files changed, 77 insertions(+), 77 deletions(-) diff --git a/frame/1/packv/bli_packv_cntl.c b/frame/1/packv/bli_packv_cntl.c index cb1404ee9..0d2a5ccab 100644 --- a/frame/1/packv/bli_packv_cntl.c +++ b/frame/1/packv/bli_packv_cntl.c @@ -34,7 +34,7 @@ #include "blis.h" -packv_t* packv_cntl; +packv_t* packv_cntl = NULL; void bli_packv_cntl_init( void ) { diff --git a/frame/1/scalv/bli_scalv_cntl.c b/frame/1/scalv/bli_scalv_cntl.c index e3f259f3e..8470805a7 100644 --- a/frame/1/scalv/bli_scalv_cntl.c +++ b/frame/1/scalv/bli_scalv_cntl.c @@ -34,7 +34,7 @@ #include "blis.h" -scalv_t* scalv_cntl; +scalv_t* scalv_cntl = NULL; void bli_scalv_cntl_init() { diff --git a/frame/1/unpackv/bli_unpackv_cntl.c b/frame/1/unpackv/bli_unpackv_cntl.c index 56d40a69b..cfa0be8ff 100644 --- a/frame/1/unpackv/bli_unpackv_cntl.c +++ b/frame/1/unpackv/bli_unpackv_cntl.c @@ -34,7 +34,7 @@ #include "blis.h" -unpackv_t* unpackv_cntl; +unpackv_t* unpackv_cntl = NULL; void bli_unpackv_cntl_init() { diff --git a/frame/1m/packm/bli_packm_cntl.c b/frame/1m/packm/bli_packm_cntl.c index 73c0fbe2c..ae800e99c 100644 --- a/frame/1m/packm/bli_packm_cntl.c +++ b/frame/1m/packm/bli_packm_cntl.c @@ -34,10 +34,10 @@ #include "blis.h" -packm_t* packm_cntl_row; -packm_t* packm_cntl_col; +packm_t* packm_cntl_row = NULL; +packm_t* packm_cntl_col = NULL; -packm_t* packm_cntl; +packm_t* packm_cntl = NULL; void bli_packm_cntl_init() { diff --git a/frame/1m/scalm/bli_scalm_cntl.c b/frame/1m/scalm/bli_scalm_cntl.c index 5f40df27d..1f26635ca 100644 --- a/frame/1m/scalm/bli_scalm_cntl.c +++ b/frame/1m/scalm/bli_scalm_cntl.c @@ -34,7 +34,7 @@ #include "blis.h" -scalm_t* scalm_cntl; +scalm_t* scalm_cntl = NULL; void bli_scalm_cntl_init() { diff --git a/frame/1m/unpackm/bli_unpackm_cntl.c b/frame/1m/unpackm/bli_unpackm_cntl.c index 3f8bb3b55..7d88c1318 100644 --- a/frame/1m/unpackm/bli_unpackm_cntl.c +++ b/frame/1m/unpackm/bli_unpackm_cntl.c @@ -34,7 +34,7 @@ #include "blis.h" -unpackm_t* unpackm_cntl; +unpackm_t* unpackm_cntl = NULL; void bli_unpackm_cntl_init() { diff --git a/frame/2/gemv/bli_gemv_cntl.c b/frame/2/gemv/bli_gemv_cntl.c index eabba1368..05ae42ef4 100644 --- a/frame/2/gemv/bli_gemv_cntl.c +++ b/frame/2/gemv/bli_gemv_cntl.c @@ -39,17 +39,17 @@ extern packm_t* packm_cntl; extern packv_t* packv_cntl; extern unpackv_t* unpackv_cntl; -gemv_t* gemv_cntl_bs_ke_dot; -gemv_t* gemv_cntl_bs_ke_axpy; +gemv_t* gemv_cntl_bs_ke_dot = NULL; +gemv_t* gemv_cntl_bs_ke_axpy = NULL; -gemv_t* gemv_cntl_rp_bs_dot; -gemv_t* gemv_cntl_rp_bs_axpy; +gemv_t* gemv_cntl_rp_bs_dot = NULL; +gemv_t* gemv_cntl_rp_bs_axpy = NULL; -gemv_t* gemv_cntl_cp_bs_dot; -gemv_t* gemv_cntl_cp_bs_axpy; +gemv_t* gemv_cntl_cp_bs_dot = NULL; +gemv_t* gemv_cntl_cp_bs_axpy = NULL; -gemv_t* gemv_cntl_ge_dot; -gemv_t* gemv_cntl_ge_axpy; +gemv_t* gemv_cntl_ge_dot = NULL; +gemv_t* gemv_cntl_ge_axpy = NULL; void bli_gemv_cntl_init() diff --git a/frame/2/ger/bli_ger_cntl.c b/frame/2/ger/bli_ger_cntl.c index 5eeebfe24..f3f20e3bb 100644 --- a/frame/2/ger/bli_ger_cntl.c +++ b/frame/2/ger/bli_ger_cntl.c @@ -38,17 +38,17 @@ extern packm_t* packm_cntl; extern packv_t* packv_cntl; extern unpackm_t* unpackm_cntl; -ger_t* ger_cntl_bs_ke_row; -ger_t* ger_cntl_bs_ke_col; +ger_t* ger_cntl_bs_ke_row = NULL; +ger_t* ger_cntl_bs_ke_col = NULL; -ger_t* ger_cntl_rp_bs_row; -ger_t* ger_cntl_rp_bs_col; +ger_t* ger_cntl_rp_bs_row = NULL; +ger_t* ger_cntl_rp_bs_col = NULL; -ger_t* ger_cntl_cp_bs_row; -ger_t* ger_cntl_cp_bs_col; +ger_t* ger_cntl_cp_bs_row = NULL; +ger_t* ger_cntl_cp_bs_col = NULL; -ger_t* ger_cntl_ge_row; -ger_t* ger_cntl_ge_col; +ger_t* ger_cntl_ge_row = NULL; +ger_t* ger_cntl_ge_col = NULL; void bli_ger_cntl_init() diff --git a/frame/2/hemv/bli_hemv_cntl.c b/frame/2/hemv/bli_hemv_cntl.c index a4f8ed263..e245ab689 100644 --- a/frame/2/hemv/bli_hemv_cntl.c +++ b/frame/2/hemv/bli_hemv_cntl.c @@ -44,10 +44,10 @@ extern gemv_t* gemv_cntl_rp_bs_axpy; extern gemv_t* gemv_cntl_cp_bs_dot; extern gemv_t* gemv_cntl_cp_bs_axpy; -hemv_t* hemv_cntl_bs_ke_lrow_ucol; -hemv_t* hemv_cntl_bs_ke_lcol_urow; -hemv_t* hemv_cntl_ge_lrow_ucol; -hemv_t* hemv_cntl_ge_lcol_urow; +hemv_t* hemv_cntl_bs_ke_lrow_ucol = NULL; +hemv_t* hemv_cntl_bs_ke_lcol_urow = NULL; +hemv_t* hemv_cntl_ge_lrow_ucol = NULL; +hemv_t* hemv_cntl_ge_lcol_urow = NULL; void bli_hemv_cntl_init() diff --git a/frame/2/her/bli_her_cntl.c b/frame/2/her/bli_her_cntl.c index c23156b83..6d5d35a2b 100644 --- a/frame/2/her/bli_her_cntl.c +++ b/frame/2/her/bli_her_cntl.c @@ -43,11 +43,11 @@ extern ger_t* ger_cntl_cp_bs_col; extern ger_t* ger_cntl_bs_ke_row; extern ger_t* ger_cntl_bs_ke_col; -her_t* her_cntl_bs_ke_lrow_ucol; -her_t* her_cntl_bs_ke_lcol_urow; +her_t* her_cntl_bs_ke_lrow_ucol = NULL; +her_t* her_cntl_bs_ke_lcol_urow = NULL; -her_t* her_cntl_ge_lrow_ucol; -her_t* her_cntl_ge_lcol_urow; +her_t* her_cntl_ge_lrow_ucol = NULL; +her_t* her_cntl_ge_lcol_urow = NULL; void bli_her_cntl_init() diff --git a/frame/2/her2/bli_her2_cntl.c b/frame/2/her2/bli_her2_cntl.c index ce9877b4b..51b909b49 100644 --- a/frame/2/her2/bli_her2_cntl.c +++ b/frame/2/her2/bli_her2_cntl.c @@ -41,11 +41,11 @@ extern unpackm_t* unpackm_cntl; extern ger_t* ger_cntl_rp_bs_row; extern ger_t* ger_cntl_cp_bs_col; -her2_t* her2_cntl_bs_ke_lrow_ucol; -her2_t* her2_cntl_bs_ke_lcol_urow; +her2_t* her2_cntl_bs_ke_lrow_ucol = NULL; +her2_t* her2_cntl_bs_ke_lcol_urow = NULL; -her2_t* her2_cntl_ge_lrow_ucol; -her2_t* her2_cntl_ge_lcol_urow; +her2_t* her2_cntl_ge_lrow_ucol = NULL; +her2_t* her2_cntl_ge_lcol_urow = NULL; void bli_her2_cntl_init() diff --git a/frame/2/trmv/bli_trmv_cntl.c b/frame/2/trmv/bli_trmv_cntl.c index c71ca0a95..59c417291 100644 --- a/frame/2/trmv/bli_trmv_cntl.c +++ b/frame/2/trmv/bli_trmv_cntl.c @@ -43,10 +43,10 @@ extern gemv_t* gemv_cntl_rp_bs_axpy; extern gemv_t* gemv_cntl_cp_bs_dot; extern gemv_t* gemv_cntl_cp_bs_axpy; -trmv_t* trmv_cntl_bs_ke_nrow_tcol; -trmv_t* trmv_cntl_bs_ke_ncol_trow; -trmv_t* trmv_cntl_ge_nrow_tcol; -trmv_t* trmv_cntl_ge_ncol_trow; +trmv_t* trmv_cntl_bs_ke_nrow_tcol = NULL; +trmv_t* trmv_cntl_bs_ke_ncol_trow = NULL; +trmv_t* trmv_cntl_ge_nrow_tcol = NULL; +trmv_t* trmv_cntl_ge_ncol_trow = NULL; void bli_trmv_cntl_init() diff --git a/frame/2/trsv/bli_trsv_cntl.c b/frame/2/trsv/bli_trsv_cntl.c index 9a3b20b1c..a90df2c2e 100644 --- a/frame/2/trsv/bli_trsv_cntl.c +++ b/frame/2/trsv/bli_trsv_cntl.c @@ -44,10 +44,10 @@ extern gemv_t* gemv_cntl_rp_bs_axpy; extern gemv_t* gemv_cntl_cp_bs_dot; extern gemv_t* gemv_cntl_cp_bs_axpy; -trsv_t* trsv_cntl_bs_ke_nrow_tcol; -trsv_t* trsv_cntl_bs_ke_ncol_trow; -trsv_t* trsv_cntl_ge_nrow_tcol; -trsv_t* trsv_cntl_ge_ncol_trow; +trsv_t* trsv_cntl_bs_ke_nrow_tcol = NULL; +trsv_t* trsv_cntl_bs_ke_ncol_trow = NULL; +trsv_t* trsv_cntl_ge_nrow_tcol = NULL; +trsv_t* trsv_cntl_ge_ncol_trow = NULL; void bli_trsv_cntl_init() diff --git a/frame/3/gemm/bli_gemm_cntl.c b/frame/3/gemm/bli_gemm_cntl.c index 09b128354..fd00be419 100644 --- a/frame/3/gemm/bli_gemm_cntl.c +++ b/frame/3/gemm/bli_gemm_cntl.c @@ -36,15 +36,15 @@ extern scalm_t* scalm_cntl; -packm_t* gemm_packa_cntl; -packm_t* gemm_packb_cntl; +packm_t* gemm_packa_cntl = NULL; +packm_t* gemm_packb_cntl = NULL; -gemm_t* gemm_cntl_bp_ke; -gemm_t* gemm_cntl_op_bp; -gemm_t* gemm_cntl_mm_op; -gemm_t* gemm_cntl_vl_mm; +gemm_t* gemm_cntl_bp_ke = NULL; +gemm_t* gemm_cntl_op_bp = NULL; +gemm_t* gemm_cntl_mm_op = NULL; +gemm_t* gemm_cntl_vl_mm = NULL; -gemm_t* gemm_cntl; +gemm_t* gemm_cntl = NULL; void bli_gemm_cntl_init() { diff --git a/frame/3/trsm/bli_trsm_cntl.c b/frame/3/trsm/bli_trsm_cntl.c index 765b06889..ea2602eb6 100644 --- a/frame/3/trsm/bli_trsm_cntl.c +++ b/frame/3/trsm/bli_trsm_cntl.c @@ -38,24 +38,24 @@ extern scalm_t* scalm_cntl; extern gemm_t* gemm_cntl_bp_ke; -packm_t* trsm_l_packa_cntl; -packm_t* trsm_l_packb_cntl; +packm_t* trsm_l_packa_cntl = NULL; +packm_t* trsm_l_packb_cntl = NULL; -packm_t* trsm_r_packa_cntl; -packm_t* trsm_r_packb_cntl; +packm_t* trsm_r_packa_cntl = NULL; +packm_t* trsm_r_packb_cntl = NULL; -trsm_t* trsm_cntl_bp_ke; +trsm_t* trsm_cntl_bp_ke = NULL; -trsm_t* trsm_l_cntl_op_bp; -trsm_t* trsm_l_cntl_mm_op; -trsm_t* trsm_l_cntl_vl_mm; +trsm_t* trsm_l_cntl_op_bp = NULL; +trsm_t* trsm_l_cntl_mm_op = NULL; +trsm_t* trsm_l_cntl_vl_mm = NULL; -trsm_t* trsm_r_cntl_op_bp; -trsm_t* trsm_r_cntl_mm_op; -trsm_t* trsm_r_cntl_vl_mm; +trsm_t* trsm_r_cntl_op_bp = NULL; +trsm_t* trsm_r_cntl_mm_op = NULL; +trsm_t* trsm_r_cntl_vl_mm = NULL; -trsm_t* trsm_l_cntl; -trsm_t* trsm_r_cntl; +trsm_t* trsm_l_cntl = NULL; +trsm_t* trsm_r_cntl = NULL; void bli_trsm_cntl_init() diff --git a/frame/base/bli_const.c b/frame/base/bli_const.c index 4c7d9effc..b931a7e8b 100644 --- a/frame/base/bli_const.c +++ b/frame/base/bli_const.c @@ -34,13 +34,13 @@ #include "blis.h" -obj_t BLIS_TWO; -obj_t BLIS_ONE; -obj_t BLIS_ONE_HALF; -obj_t BLIS_ZERO; -obj_t BLIS_MINUS_ONE_HALF; -obj_t BLIS_MINUS_ONE; -obj_t BLIS_MINUS_TWO; +obj_t BLIS_TWO = {}; +obj_t BLIS_ONE = {}; +obj_t BLIS_ONE_HALF = {}; +obj_t BLIS_ZERO = {}; +obj_t BLIS_MINUS_ONE_HALF = {}; +obj_t BLIS_MINUS_ONE = {}; +obj_t BLIS_MINUS_TWO = {}; static bool_t bli_const_is_init = FALSE; diff --git a/frame/base/bli_getopt.c b/frame/base/bli_getopt.c index c56868558..90c93dc76 100644 --- a/frame/base/bli_getopt.c +++ b/frame/base/bli_getopt.c @@ -35,11 +35,11 @@ #include "blis.h" -char *bli_optarg; +char *bli_optarg = NULL; int bli_optind = 1; int bli_opterr = 0; -int bli_optopt; +int bli_optopt = 0; static char OPT_MARKER = '-'; diff --git a/frame/base/bli_threading.c b/frame/base/bli_threading.c index fbb457eec..c0d88cd31 100644 --- a/frame/base/bli_threading.c +++ b/frame/base/bli_threading.c @@ -36,10 +36,10 @@ static bool_t bli_thread_is_init = FALSE; -packm_thrinfo_t BLIS_PACKM_SINGLE_THREADED; -gemm_thrinfo_t BLIS_GEMM_SINGLE_THREADED; -herk_thrinfo_t BLIS_HERK_SINGLE_THREADED; -thread_comm_t BLIS_SINGLE_COMM; +packm_thrinfo_t BLIS_PACKM_SINGLE_THREADED = {}; +gemm_thrinfo_t BLIS_GEMM_SINGLE_THREADED = {}; +herk_thrinfo_t BLIS_HERK_SINGLE_THREADED = {}; +thread_comm_t BLIS_SINGLE_COMM = {}; void bli_thread_init( void ) {