mirror of
https://github.com/amd/blis.git
synced 2026-05-11 09:39:59 +00:00
Merge branch 'master' into knl
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -29,6 +29,7 @@
|
||||
# -- build system files --
|
||||
|
||||
config.mk
|
||||
bli_config.h
|
||||
|
||||
# -- makefile fragments --
|
||||
|
||||
|
||||
8
Makefile
8
Makefile
@@ -257,7 +257,7 @@ endif
|
||||
|
||||
# Expand the fragment paths that contain .h files to attain the set of header
|
||||
# files present in all fragment paths.
|
||||
MK_HEADER_FILES := $(foreach frag_path, $(FRAGMENT_DIR_PATHS), \
|
||||
MK_HEADER_FILES := $(foreach frag_path, . $(FRAGMENT_DIR_PATHS), \
|
||||
$(wildcard $(frag_path)/*.h))
|
||||
|
||||
# Strip the leading, internal, and trailing whitespace from our list of header
|
||||
@@ -268,7 +268,7 @@ MK_HEADER_FILES := $(strip $(MK_HEADER_FILES))
|
||||
# expansion. Then, strip the header filename to leave the path to each header
|
||||
# location. Notice this process even weeds out duplicates! Add the config
|
||||
# directory manually since it contains FLA_config.h.
|
||||
MK_HEADER_DIR_PATHS := $(dir $(foreach frag_path, $(FRAGMENT_DIR_PATHS), \
|
||||
MK_HEADER_DIR_PATHS := $(dir $(foreach frag_path, . $(FRAGMENT_DIR_PATHS), \
|
||||
$(firstword $(wildcard $(frag_path)/*.h))))
|
||||
|
||||
# Add -I to each header path so we can specify our include search paths to the
|
||||
@@ -678,11 +678,11 @@ endif
|
||||
|
||||
cleantest: check-env
|
||||
ifeq ($(BLIS_ENABLE_VERBOSE_MAKE_OUTPUT),yes)
|
||||
- $(FIND) $(BASE_OBJ_TESTSUITE_PATH) -name "*.o" -name "*.pexe" | $(XARGS) $(RM_F)
|
||||
- $(FIND) $(BASE_OBJ_TESTSUITE_PATH) \( -name "*.o" -o -name "*.pexe" \) | $(XARGS) $(RM_F)
|
||||
- $(RM_RF) $(TESTSUITE_BIN)
|
||||
else
|
||||
@echo "Removing object files from $(BASE_OBJ_TESTSUITE_PATH)."
|
||||
@- $(FIND) $(BASE_OBJ_TESTSUITE_PATH) -name "*.o" -name "*.pexe" | $(XARGS) $(RM_F)
|
||||
@- $(FIND) $(BASE_OBJ_TESTSUITE_PATH) \( -name "*.o" -o -name "*.pexe" \) | $(XARGS) $(RM_F)
|
||||
@echo "Removing $(TESTSUITE_BIN) binary."
|
||||
@- $(RM_RF) $(TESTSUITE_BIN)
|
||||
endif
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
@@ -35,11 +35,48 @@
|
||||
#ifndef BLIS_CONFIG_H
|
||||
#define BLIS_CONFIG_H
|
||||
|
||||
#if @enable_pthreads@
|
||||
#define BLIS_ENABLE_PTHREADS
|
||||
#endif
|
||||
|
||||
#define BLIS_SIMD_ALIGN_SIZE 16
|
||||
#if @enable_openmp@
|
||||
#define BLIS_ENABLE_OPENMP
|
||||
#endif
|
||||
|
||||
#if @int_type_size@ == 64
|
||||
#define BLIS_INT_TYPE_SIZE 64
|
||||
#elif @int_type_size@ == 32
|
||||
#define BLIS_INT_TYPE_SIZE 32
|
||||
#else
|
||||
// determine automatically
|
||||
#endif
|
||||
|
||||
#if @blas2blis_int_type_size@ == 64
|
||||
#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 64
|
||||
#elif @blas2blis_int_type_size@ == 32
|
||||
#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32
|
||||
#else
|
||||
// determine automatically
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ENABLE_BLAS2BLIS
|
||||
#ifndef BLIS_DISABLE_BLAS2BLIS
|
||||
#if @enable_blas2blis@
|
||||
#define BLIS_ENABLE_BLAS2BLIS
|
||||
#else
|
||||
#define BLIS_DISABLE_BLAS2BLIS
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef BLIS_ENABLE_CBLAS
|
||||
#ifndef BLIS_DISABLE_CBLAS
|
||||
#if @enable_cblas@
|
||||
#define BLIS_ENABLE_CBLAS
|
||||
#else
|
||||
#define BLIS_DISABLE_CBLAS
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
15
common.mk
15
common.mk
@@ -156,11 +156,11 @@ ifeq ($(THREADING_MODEL),auto)
|
||||
THREADING_MODEL := omp
|
||||
endif
|
||||
ifeq ($(THREADING_MODEL),omp)
|
||||
CTHREADFLAGS := -fopenmp -DBLIS_ENABLE_OPENMP
|
||||
CTHREADFLAGS := -fopenmp
|
||||
LDFLAGS += -fopenmp
|
||||
endif
|
||||
ifeq ($(THREADING_MODEL),pthreads)
|
||||
CTHREADFLAGS := -pthread -DBLIS_ENABLE_PTHREADS
|
||||
CTHREADFLAGS := -pthread
|
||||
LDFLAGS += -lpthread
|
||||
endif
|
||||
endif
|
||||
@@ -170,11 +170,11 @@ ifeq ($(THREADING_MODEL),auto)
|
||||
THREADING_MODEL := omp
|
||||
endif
|
||||
ifeq ($(THREADING_MODEL),omp)
|
||||
CTHREADFLAGS := -openmp -DBLIS_ENABLE_OPENMP
|
||||
CTHREADFLAGS := -openmp
|
||||
LDFLAGS += -openmp
|
||||
endif
|
||||
ifeq ($(THREADING_MODEL),pthreads)
|
||||
CTHREADFLAGS := -pthread -DBLIS_ENABLE_PTHREADS
|
||||
CTHREADFLAGS := -pthread
|
||||
LDFLAGS += -lpthread
|
||||
endif
|
||||
endif
|
||||
@@ -184,10 +184,11 @@ ifeq ($(THREADING_MODEL),auto)
|
||||
THREADING_MODEL := pthreads
|
||||
endif
|
||||
ifeq ($(THREADING_MODEL),omp)
|
||||
$(error OpenMP is not supported with Clang.)
|
||||
CTHREADFLAGS := -fopenmp
|
||||
LDFLAGS += -fopenmp
|
||||
endif
|
||||
ifeq ($(THREADING_MODEL),pthreads)
|
||||
CTHREADFLAGS := -pthread -DBLIS_ENABLE_PTHREADS
|
||||
CTHREADFLAGS := -pthread
|
||||
LDFLAGS += -lpthread
|
||||
endif
|
||||
endif
|
||||
@@ -207,10 +208,12 @@ CFLAGS_KERNELS := $(CKOPTFLAGS) $(CVECFLAGS) $(CFLAGS_NOOPT)
|
||||
|
||||
ifeq ($(V),1)
|
||||
BLIS_ENABLE_VERBOSE_MAKE_OUTPUT := yes
|
||||
BLIS_ENABLE_TEST_OUTPUT := yes
|
||||
endif
|
||||
|
||||
ifeq ($(V),0)
|
||||
BLIS_ENABLE_VERBOSE_MAKE_OUTPUT := no
|
||||
BLIS_ENABLE_TEST_OUTPUT := no
|
||||
endif
|
||||
|
||||
|
||||
|
||||
@@ -1,43 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_CONFIG_H
|
||||
#define BLIS_CONFIG_H
|
||||
|
||||
|
||||
#define BLIS_INT_TYPE_SIZE 32
|
||||
#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32
|
||||
|
||||
|
||||
#endif
|
||||
@@ -1,44 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_CONFIG_H
|
||||
#define BLIS_CONFIG_H
|
||||
|
||||
|
||||
#define BLIS_INT_TYPE_SIZE 64
|
||||
#define BLIS_SIMD_ALIGN_SIZE 16
|
||||
#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32
|
||||
|
||||
|
||||
#endif
|
||||
@@ -38,6 +38,8 @@
|
||||
|
||||
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
|
||||
|
||||
#define BLIS_SIMD_ALIGN_SIZE 16
|
||||
|
||||
// -- Cache blocksizes --
|
||||
|
||||
//
|
||||
|
||||
@@ -1,45 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_CONFIG_H
|
||||
#define BLIS_CONFIG_H
|
||||
|
||||
|
||||
#undef restrict
|
||||
|
||||
#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
@@ -36,6 +36,9 @@
|
||||
#define BLIS_KERNEL_H
|
||||
|
||||
|
||||
#undef restrict
|
||||
|
||||
|
||||
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
|
||||
|
||||
// -- Cache blocksizes --
|
||||
|
||||
@@ -1,43 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_CONFIG_H
|
||||
#define BLIS_CONFIG_H
|
||||
|
||||
|
||||
#define BLIS_SIMD_ALIGN_SIZE 16
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
@@ -38,6 +38,8 @@
|
||||
|
||||
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
|
||||
|
||||
#define BLIS_SIMD_ALIGN_SIZE 16
|
||||
|
||||
// -- Cache blocksizes --
|
||||
|
||||
//
|
||||
|
||||
@@ -38,6 +38,8 @@
|
||||
|
||||
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
|
||||
|
||||
#define BLIS_SIMD_ALIGN_SIZE 16
|
||||
|
||||
// -- Cache blocksizes --
|
||||
|
||||
//
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_CONFIG_H
|
||||
#define BLIS_CONFIG_H
|
||||
|
||||
|
||||
#define BLIS_INT_TYPE_SIZE 32
|
||||
#define BLIS_SIMD_ALIGN_SIZE 16
|
||||
#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32
|
||||
|
||||
|
||||
#endif
|
||||
@@ -38,6 +38,8 @@
|
||||
|
||||
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
|
||||
|
||||
#define BLIS_SIMD_ALIGN_SIZE 16
|
||||
|
||||
// -- Cache blocksizes --
|
||||
|
||||
//
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_CONFIG_H
|
||||
#define BLIS_CONFIG_H
|
||||
|
||||
|
||||
#define BLIS_INT_TYPE_SIZE 32
|
||||
#define BLIS_SIMD_ALIGN_SIZE 16
|
||||
#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32
|
||||
|
||||
|
||||
#endif
|
||||
@@ -38,6 +38,8 @@
|
||||
|
||||
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
|
||||
|
||||
#define BLIS_SIMD_ALIGN_SIZE 16
|
||||
|
||||
// -- Cache blocksizes --
|
||||
|
||||
//
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_CONFIG_H
|
||||
#define BLIS_CONFIG_H
|
||||
|
||||
|
||||
#define BLIS_SIMD_ALIGN_SIZE 16
|
||||
|
||||
|
||||
#endif
|
||||
@@ -38,6 +38,9 @@
|
||||
|
||||
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
|
||||
|
||||
#undef BLIS_SIMD_ALIGN_SIZE
|
||||
#define BLIS_SIMD_ALIGN_SIZE 16
|
||||
|
||||
// -- Cache blocksizes --
|
||||
|
||||
//
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_CONFIG_H
|
||||
#define BLIS_CONFIG_H
|
||||
|
||||
|
||||
#define BLIS_INT_TYPE_SIZE 32
|
||||
#define BLIS_SIMD_ALIGN_SIZE 16
|
||||
#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32
|
||||
|
||||
|
||||
#endif
|
||||
@@ -39,6 +39,8 @@
|
||||
|
||||
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
|
||||
|
||||
#define BLIS_SIMD_ALIGN_SIZE 16
|
||||
|
||||
// -- Cache blocksizes --
|
||||
|
||||
//
|
||||
|
||||
@@ -1,41 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_CONFIG_H
|
||||
#define BLIS_CONFIG_H
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
@@ -60,12 +60,23 @@
|
||||
|
||||
#else
|
||||
|
||||
/*
|
||||
#define BLIS_SGEMM_UKERNEL bli_sgemm_asm_16x6
|
||||
#define BLIS_DEFAULT_MC_S 144
|
||||
#define BLIS_DEFAULT_KC_S 256
|
||||
#define BLIS_DEFAULT_NC_S 4080
|
||||
#define BLIS_DEFAULT_MR_S 16
|
||||
#define BLIS_DEFAULT_NR_S 6
|
||||
*/
|
||||
|
||||
#define BLIS_SGEMM_UKERNEL bli_sgemm_asm_6x16
|
||||
#define BLIS_DEFAULT_MC_S 144
|
||||
#define BLIS_DEFAULT_KC_S 256
|
||||
#define BLIS_DEFAULT_NC_S 4080
|
||||
#define BLIS_DEFAULT_MR_S 6
|
||||
#define BLIS_DEFAULT_NR_S 16
|
||||
|
||||
#define BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS
|
||||
|
||||
#endif
|
||||
|
||||
@@ -80,12 +91,24 @@
|
||||
|
||||
#else
|
||||
|
||||
/*
|
||||
#define BLIS_DGEMM_UKERNEL bli_dgemm_asm_8x6
|
||||
#define BLIS_DEFAULT_MC_D 72
|
||||
#define BLIS_DEFAULT_KC_D 256
|
||||
#define BLIS_DEFAULT_NC_D 4080
|
||||
#define BLIS_DEFAULT_MR_D 8
|
||||
#define BLIS_DEFAULT_NR_D 6
|
||||
*/
|
||||
|
||||
#define BLIS_DGEMM_UKERNEL bli_dgemm_asm_6x8
|
||||
#define BLIS_DEFAULT_MC_D 72
|
||||
#define BLIS_DEFAULT_KC_D 256
|
||||
#define BLIS_DEFAULT_NC_D 4080
|
||||
#define BLIS_DEFAULT_MR_D 6
|
||||
#define BLIS_DEFAULT_NR_D 8
|
||||
|
||||
#define BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_CONFIG_H
|
||||
#define BLIS_CONFIG_H
|
||||
|
||||
|
||||
#define BLIS_INT_TYPE_SIZE 32
|
||||
#define BLIS_SIMD_ALIGN_SIZE 16
|
||||
#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32
|
||||
|
||||
|
||||
#endif
|
||||
@@ -38,6 +38,8 @@
|
||||
|
||||
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
|
||||
|
||||
#define BLIS_SIMD_ALIGN_SIZE 16
|
||||
|
||||
// -- Cache blocksizes --
|
||||
|
||||
//
|
||||
|
||||
@@ -36,6 +36,16 @@
|
||||
#define BLIS_KERNEL_H
|
||||
|
||||
|
||||
#define BLIS_TREE_BARRIER
|
||||
#define BLIS_TREE_BARRIER_ARITY 4
|
||||
|
||||
|
||||
#define BLIS_SIMD_ALIGN_SIZE 32
|
||||
|
||||
#define BLIS_SIMD_SIZE 64
|
||||
#define BLIS_SIMD_NUM_REGISTERS 32
|
||||
|
||||
|
||||
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
|
||||
|
||||
// -- Cache blocksizes --
|
||||
|
||||
@@ -1,43 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_CONFIG_H
|
||||
#define BLIS_CONFIG_H
|
||||
|
||||
|
||||
|
||||
#define BLIS_SIMD_ALIGN_SIZE 16
|
||||
|
||||
|
||||
#endif
|
||||
@@ -38,6 +38,8 @@
|
||||
|
||||
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
|
||||
|
||||
#define BLIS_SIMD_ALIGN_SIZE 16
|
||||
|
||||
// -- Cache blocksizes --
|
||||
|
||||
//
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_CONFIG_H
|
||||
#define BLIS_CONFIG_H
|
||||
|
||||
|
||||
#define BLIS_INT_TYPE_SIZE 32
|
||||
#define BLIS_SIMD_ALIGN_SIZE 16
|
||||
#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32
|
||||
|
||||
|
||||
#endif
|
||||
@@ -44,6 +44,8 @@
|
||||
|
||||
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
|
||||
|
||||
#define BLIS_SIMD_ALIGN_SIZE 16
|
||||
|
||||
// -- Cache blocksizes --
|
||||
|
||||
//
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_CONFIG_H
|
||||
#define BLIS_CONFIG_H
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
@@ -1,41 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_CONFIG_H
|
||||
#define BLIS_CONFIG_H
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
121
configure
vendored
121
configure
vendored
@@ -91,14 +91,34 @@ print_usage()
|
||||
echo " -t MODEL, --enable-threading[=MODEL], --disable-threading"
|
||||
echo " "
|
||||
echo " Enable threading in the library, using threading model"
|
||||
echo " MODEL={auto,omp,pthreads,no}. If MODEL=no or "
|
||||
echo " MODEL={omp,pthreads,no}. If MODEL=no or "
|
||||
echo " --disable-threading is specified, threading will be"
|
||||
echo " disabled. If MODEL=auto or is unspecified, a model"
|
||||
echo " will be chosen automatically. The default is 'auto'."
|
||||
echo " disabled. The default is 'no'."
|
||||
echo " "
|
||||
echo " -q, --quiet Suppress informational output. By default, configure"
|
||||
echo " is verbose. (NOTE: -q is not yet implemented)"
|
||||
echo " "
|
||||
echo " -i SIZE, --int-size=SIZE"
|
||||
echo " "
|
||||
echo " Set the size (in bits) of internal BLIS integers and"
|
||||
echo " integer types used in native BLIS interfaces."
|
||||
echo " "
|
||||
echo " -b SIZE, --blas-int-size=SIZE"
|
||||
echo " "
|
||||
echo " Set the size (in bits) of integer types in external"
|
||||
echo " BLAS and CBLAS interfaces, if enabled."
|
||||
echo " "
|
||||
echo " --disable-blas, --enable-blas"
|
||||
echo " "
|
||||
echo " Disable (enabled by default) building the BLAS"
|
||||
echo " compatibility layer."
|
||||
echo " "
|
||||
echo " --enable-cblas, --disable-cblas"
|
||||
echo " "
|
||||
echo " Enable (disabled by default) building the CBLAS"
|
||||
echo " compatibility layer. This automatically enables the"
|
||||
echo " BLAS compatibility layer as well."
|
||||
echo " "
|
||||
echo " -h, --help Output this information and quit."
|
||||
echo " "
|
||||
echo " Environment Variables:"
|
||||
@@ -149,6 +169,13 @@ main()
|
||||
config_mk_in_path="${build_dirpath}/${config_mk_in}"
|
||||
config_mk_out_path="${cur_dirpath}/${config_mk_out}"
|
||||
|
||||
# The names/paths for the template bli_config.h.in and its instantiated
|
||||
# counterpart.
|
||||
bli_config_h_in='bli_config.h.in'
|
||||
bli_config_h_out='bli_config.h'
|
||||
bli_config_h_in_path="${build_dirpath}/${bli_config_h_in}"
|
||||
bli_config_h_out_path="${cur_dirpath}/${bli_config_h_out}"
|
||||
|
||||
# Path to 'update-version-file.sh' script.
|
||||
update_version_file_sh="${build_dirpath}/update-version-file.sh"
|
||||
|
||||
@@ -188,7 +215,7 @@ main()
|
||||
debug_flag=''
|
||||
|
||||
# The threading flag.
|
||||
threading_model='auto'
|
||||
threading_model='no'
|
||||
|
||||
# Option variables.
|
||||
quiet_flag=''
|
||||
@@ -197,6 +224,10 @@ main()
|
||||
enable_verbose='no'
|
||||
enable_static='yes'
|
||||
enable_shared='no'
|
||||
int_type_size=0
|
||||
blas2blis_int_type_size=32
|
||||
enable_blas2blis='yes'
|
||||
enable_cblas='no'
|
||||
|
||||
# The path to the auto-detection script.
|
||||
auto_detect_sh="${build_dirpath}/auto-detect/auto-detect.sh"
|
||||
@@ -221,7 +252,7 @@ main()
|
||||
|
||||
|
||||
# Process our command line options.
|
||||
while getopts ":hp:d:t:q-:" opt; do
|
||||
while getopts ":hp:d:t:qi:b:-:" opt; do
|
||||
case $opt in
|
||||
-)
|
||||
case "$OPTARG" in
|
||||
@@ -264,15 +295,30 @@ main()
|
||||
disable-shared)
|
||||
enable_shared='no'
|
||||
;;
|
||||
enable-threading)
|
||||
threading_model='auto'
|
||||
;;
|
||||
enable-threading=*)
|
||||
threading_model=${OPTARG#*=}
|
||||
;;
|
||||
disable-threading)
|
||||
threading_model='no'
|
||||
;;
|
||||
int-size=*)
|
||||
int_type_size=${OPTARG#*=}
|
||||
;;
|
||||
blas-int-size=*)
|
||||
blas2blis_int_type_size=${OPTARG#*=}
|
||||
;;
|
||||
enable-blas)
|
||||
enable_blas2blis='yes'
|
||||
;;
|
||||
disable-blas)
|
||||
enable_blas2blis='no'
|
||||
;;
|
||||
enable-cblas)
|
||||
enable_cblas='yes'
|
||||
;;
|
||||
disable-cblas)
|
||||
enable_cblas='no'
|
||||
;;
|
||||
*)
|
||||
print_usage
|
||||
;;
|
||||
@@ -294,6 +340,12 @@ main()
|
||||
t)
|
||||
threading_model=$OPTARG
|
||||
;;
|
||||
i)
|
||||
int_type_size=$OPTARG
|
||||
;;
|
||||
b)
|
||||
blas2blis_int_type_size=$OPTARG
|
||||
;;
|
||||
\?)
|
||||
print_usage
|
||||
;;
|
||||
@@ -430,18 +482,58 @@ main()
|
||||
|
||||
|
||||
# Check the threading model flag.
|
||||
enable_openmp=0
|
||||
enable_pthreads=0
|
||||
if [ "x${threading_model}" = "xauto" ]; then
|
||||
echo "${script_name}: determining the threading model automatically."
|
||||
elif [ "x${threading_model}" = "xomp" ]; then
|
||||
echo "${script_name}: using OpenMP for threading."
|
||||
enable_openmp=1
|
||||
elif [ "x${threading_model}" = "xpthreads" ]; then
|
||||
echo "${script_name}: using Pthreads for threading."
|
||||
enable_pthreads=1
|
||||
elif [ "x${threading_model}" = "xno" ]; then
|
||||
echo "${script_name}: threading is disabled."
|
||||
else
|
||||
echo "Unsupported threading model: ${threading_model}."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
# Convert 'yes' and 'no' flags to booleans.
|
||||
if [ "x${enable_cblas}" = "xyes" ]; then
|
||||
echo "${script_name}: the CBLAS compatibility layer is enabled."
|
||||
enable_cblas=1
|
||||
# Force BLAS layer when CBLAS is enabled
|
||||
enable_blas='yes'
|
||||
else
|
||||
echo "${script_name}: the CBLAS compatibility layer is disabled."
|
||||
enable_cblas=0
|
||||
fi
|
||||
if [ "x${enable_blas2blis}" = "xyes" ]; then
|
||||
echo "${script_name}: the BLAS compatibility layer is enabled."
|
||||
enable_blas2blis=1
|
||||
else
|
||||
echo "${script_name}: the BLAS compatibility layer is disabled."
|
||||
enable_blas2blis=0
|
||||
fi
|
||||
|
||||
|
||||
# Report integer sizes
|
||||
if [ "x${int_type_size}" = "x32" ]; then
|
||||
echo "${script_name}: the internal integer size is 32-bit."
|
||||
elif [ "x${int_type_size}" = "x64" ]; then
|
||||
echo "${script_name}: the internal integer size is 64-bit."
|
||||
else
|
||||
echo "${script_name}: the internal integer size is automatically determined."
|
||||
fi
|
||||
if [ "x${blas2blis_int_type_size}" = "x32" ]; then
|
||||
echo "${script_name}: the BLAS/CBLAS interface integer size is 32-bit."
|
||||
elif [ "x${blas2blis_int_type_size}" = "x64" ]; then
|
||||
echo "${script_name}: the BLAS/CBLAS interface integer size is 64-bit."
|
||||
else
|
||||
echo "${script_name}: the BLAS/CBLAS interface integer size is automatically determined."
|
||||
fi
|
||||
|
||||
|
||||
# Insert escape characters into the paths used in the sed command below.
|
||||
@@ -466,6 +558,19 @@ main()
|
||||
| sed "s/@enable_dynamic@/${enable_shared}/g" \
|
||||
| sed "s/@threading_model@/${threading_model}/g" \
|
||||
> "${config_mk_out_path}"
|
||||
|
||||
|
||||
# Begin substituting information into the bli_config_h_in file, outputting
|
||||
# to bli_config_h_out.
|
||||
echo "${script_name}: creating ${bli_config_h_out_path} from ${bli_config_h_in_path}"
|
||||
cat "${bli_config_h_in_path}" \
|
||||
| sed "s/@enable_openmp@/${enable_openmp}/g" \
|
||||
| sed "s/@enable_pthreads@/${enable_pthreads}/g" \
|
||||
| sed "s/@int_type_size@/${int_type_size}/g" \
|
||||
| sed "s/@blas2blis_int_type_size@/${blas2blis_int_type_size}/g" \
|
||||
| sed "s/@enable_blas2blis@/${enable_blas2blis}/g" \
|
||||
| sed "s/@enable_cblas@/${enable_cblas}/g" \
|
||||
> "${bli_config_h_out_path}"
|
||||
|
||||
|
||||
# Create obj sub-directories (if they do not already exist).
|
||||
|
||||
@@ -56,6 +56,23 @@ GENFRONT( subv )
|
||||
GENFRONT( swapv )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
\
|
||||
void PASTEMAC(opname,_check) \
|
||||
( \
|
||||
obj_t* alpha, \
|
||||
obj_t* x, \
|
||||
obj_t* beta, \
|
||||
obj_t* y \
|
||||
) \
|
||||
{ \
|
||||
bli_l1v_axby_check( alpha, x, beta, y ); \
|
||||
}
|
||||
|
||||
GENFRONT( axpbyv )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
\
|
||||
@@ -137,6 +154,22 @@ GENFRONT( scalv )
|
||||
GENFRONT( setv )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
\
|
||||
void PASTEMAC(opname,_check) \
|
||||
( \
|
||||
obj_t* x, \
|
||||
obj_t* beta, \
|
||||
obj_t* y \
|
||||
) \
|
||||
{ \
|
||||
bli_l1v_xby_check( x, beta, y ); \
|
||||
}
|
||||
|
||||
GENFRONT( xpbyv )
|
||||
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
void bli_l1v_xy_check
|
||||
@@ -221,6 +254,108 @@ void bli_l1v_axy_check
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
void bli_l1v_xby_check
|
||||
(
|
||||
obj_t* x,
|
||||
obj_t* beta,
|
||||
obj_t* y
|
||||
)
|
||||
{
|
||||
err_t e_val;
|
||||
|
||||
// Check object datatypes.
|
||||
|
||||
e_val = bli_check_noninteger_object( beta );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_floating_object( x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_floating_object( y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_scalar_object( beta );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_vector_object( x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_vector_object( y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_equal_vector_lengths( x, y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object buffers (for non-NULLness).
|
||||
|
||||
e_val = bli_check_object_buffer( beta );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_object_buffer( x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_object_buffer( y );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
void bli_l1v_axby_check
|
||||
(
|
||||
obj_t* alpha,
|
||||
obj_t* x,
|
||||
obj_t* beta,
|
||||
obj_t* y
|
||||
)
|
||||
{
|
||||
err_t e_val;
|
||||
|
||||
// Check object datatypes.
|
||||
|
||||
e_val = bli_check_noninteger_object( alpha );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_noninteger_object( beta );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_floating_object( x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_floating_object( y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_scalar_object( alpha );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_scalar_object( beta );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_vector_object( x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_vector_object( y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_equal_vector_lengths( x, y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object buffers (for non-NULLness).
|
||||
|
||||
e_val = bli_check_object_buffer( alpha );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_object_buffer( beta );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_object_buffer( x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_object_buffer( y );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
void bli_l1v_dot_check
|
||||
(
|
||||
obj_t* alpha,
|
||||
|
||||
@@ -52,6 +52,20 @@ GENTPROT( subv )
|
||||
GENTPROT( swapv )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( opname ) \
|
||||
\
|
||||
void PASTEMAC(opname,_check) \
|
||||
( \
|
||||
obj_t* alpha, \
|
||||
obj_t* x, \
|
||||
obj_t* beta, \
|
||||
obj_t* y \
|
||||
);
|
||||
|
||||
GENTPROT( axpbyv )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( opname ) \
|
||||
\
|
||||
@@ -118,6 +132,20 @@ GENTPROT( scalv )
|
||||
GENTPROT( setv )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( opname ) \
|
||||
\
|
||||
void PASTEMAC(opname,_check) \
|
||||
( \
|
||||
obj_t* x, \
|
||||
obj_t* beta, \
|
||||
obj_t* y \
|
||||
);
|
||||
|
||||
GENTPROT( xpbyv )
|
||||
|
||||
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
void bli_l1v_xy_check
|
||||
@@ -133,6 +161,21 @@ void bli_l1v_axy_check
|
||||
obj_t* y
|
||||
);
|
||||
|
||||
void bli_l1v_xby_check
|
||||
(
|
||||
obj_t* x,
|
||||
obj_t* beta,
|
||||
obj_t* y
|
||||
);
|
||||
|
||||
void bli_l1v_axby_check
|
||||
(
|
||||
obj_t* alpha,
|
||||
obj_t* x,
|
||||
obj_t* beta,
|
||||
obj_t* y
|
||||
);
|
||||
|
||||
void bli_l1v_dot_check
|
||||
(
|
||||
obj_t* alpha,
|
||||
|
||||
@@ -64,6 +64,31 @@ GENFRONT( subv, BLIS_SUBV_KER )
|
||||
GENFRONT( swapv, BLIS_SWAPV_KER )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname, kertype, dep1, dep2, dep3, dep4 ) \
|
||||
\
|
||||
void PASTEMAC(opname,_cntx_init)( cntx_t* cntx ) \
|
||||
{ \
|
||||
bli_cntx_obj_create( cntx ); \
|
||||
\
|
||||
/* Initialize the context with kernel dependencies. */ \
|
||||
PASTEMAC(dep1,_cntx_init)( cntx ); \
|
||||
PASTEMAC(dep2,_cntx_init)( cntx ); \
|
||||
PASTEMAC(dep3,_cntx_init)( cntx ); \
|
||||
PASTEMAC(dep4,_cntx_init)( cntx ); \
|
||||
\
|
||||
/* Initialize the context with the kernel associated with the current
|
||||
operation. */ \
|
||||
bli_gks_cntx_set_l1v_ker( kertype, cntx ); \
|
||||
} \
|
||||
void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ) \
|
||||
{ \
|
||||
bli_cntx_obj_free( cntx ); \
|
||||
}
|
||||
|
||||
GENFRONT( axpbyv, BLIS_AXPBYV_KER, axpyv, xpbyv, scal2v, scalv )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname, kertype, depname ) \
|
||||
\
|
||||
@@ -84,6 +109,29 @@ void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ) \
|
||||
}
|
||||
|
||||
GENFRONT( axpyv, BLIS_AXPYV_KER, addv )
|
||||
GENFRONT( scal2v, BLIS_SCAL2V_KER, setv )
|
||||
GENFRONT( scalv, BLIS_SCALV_KER, setv )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname, kertype, dep1, dep2 ) \
|
||||
\
|
||||
void PASTEMAC(opname,_cntx_init)( cntx_t* cntx ) \
|
||||
{ \
|
||||
bli_cntx_obj_create( cntx ); \
|
||||
\
|
||||
/* Initialize the context with kernel dependencies. */ \
|
||||
PASTEMAC(dep1,_cntx_init)( cntx ); \
|
||||
PASTEMAC(dep2,_cntx_init)( cntx ); \
|
||||
\
|
||||
/* Initialize the context with the kernel associated with the current
|
||||
operation. */ \
|
||||
bli_gks_cntx_set_l1v_ker( kertype, cntx ); \
|
||||
} \
|
||||
void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ) \
|
||||
{ \
|
||||
bli_cntx_obj_free( cntx ); \
|
||||
}
|
||||
|
||||
GENFRONT( scal2v, BLIS_SCAL2V_KER, setv, copyv )
|
||||
GENFRONT( xpbyv, BLIS_XPBYV_KER, addv, copyv )
|
||||
|
||||
|
||||
@@ -44,6 +44,7 @@ void PASTEMAC(opname,_cntx_init)( cntx_t* cntx ); \
|
||||
void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx );
|
||||
|
||||
GENPROT( addv )
|
||||
GENPROT( axpbyv )
|
||||
GENPROT( axpyv )
|
||||
GENPROT( copyv )
|
||||
GENPROT( dotv )
|
||||
@@ -54,4 +55,5 @@ GENPROT( scal2v )
|
||||
GENPROT( setv )
|
||||
GENPROT( subv )
|
||||
GENPROT( swapv )
|
||||
GENPROT( xpbyv )
|
||||
|
||||
|
||||
@@ -49,8 +49,8 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
@@ -58,6 +58,24 @@ INSERT_GENTDEF( addv )
|
||||
INSERT_GENTDEF( copyv )
|
||||
INSERT_GENTDEF( subv )
|
||||
|
||||
// axpbyv
|
||||
|
||||
#undef GENTDEF
|
||||
#define GENTDEF( ctype, ch, opname, tsuf ) \
|
||||
\
|
||||
typedef void (*PASTECH2(ch,opname,tsuf)) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
INSERT_GENTDEF( axpbyv )
|
||||
|
||||
// axpyv, scal2v
|
||||
|
||||
#undef GENTDEF
|
||||
@@ -67,9 +85,9 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* alpha, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
@@ -86,9 +104,9 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* rho, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
ctype* restrict rho, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
@@ -104,11 +122,11 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
ctype* alpha, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* beta, \
|
||||
ctype* rho, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict rho, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
@@ -122,7 +140,7 @@ INSERT_GENTDEF( dotxv )
|
||||
typedef void (*PASTECH2(ch,opname,tsuf)) \
|
||||
( \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
@@ -137,8 +155,8 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \
|
||||
( \
|
||||
conj_t conjalpha, \
|
||||
dim_t n, \
|
||||
ctype* alpha, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
@@ -153,14 +171,29 @@ INSERT_GENTDEF( setv )
|
||||
typedef void (*PASTECH2(ch,opname,tsuf)) \
|
||||
( \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
INSERT_GENTDEF( swapv )
|
||||
|
||||
// xpybv
|
||||
|
||||
#undef GENTDEF
|
||||
#define GENTDEF( ctype, ch, opname, tsuf ) \
|
||||
\
|
||||
typedef void (*PASTECH2(ch,opname,tsuf)) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
INSERT_GENTDEF( xpbyv )
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -42,11 +42,11 @@
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( addv_ker_name )
|
||||
@@ -59,12 +59,29 @@ INSERT_GENTPROT_BASIC( subv_ker_name )
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* alpha, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
); \
|
||||
|
||||
INSERT_GENTPROT_BASIC( axpbyv_ker_name )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, opname ) \
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
); \
|
||||
|
||||
INSERT_GENTPROT_BASIC( axpyv_ker_name )
|
||||
@@ -76,13 +93,13 @@ INSERT_GENTPROT_BASIC( scal2v_ker_name )
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* rho, \
|
||||
cntx_t* cntx \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
ctype* restrict rho, \
|
||||
cntx_t* cntx \
|
||||
); \
|
||||
|
||||
INSERT_GENTPROT_BASIC( dotv_ker_name )
|
||||
@@ -93,15 +110,15 @@ INSERT_GENTPROT_BASIC( dotv_ker_name )
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
ctype* alpha, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* beta, \
|
||||
ctype* rho, \
|
||||
cntx_t* cntx \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict rho, \
|
||||
cntx_t* cntx \
|
||||
); \
|
||||
|
||||
INSERT_GENTPROT_BASIC( dotxv_ker_name )
|
||||
@@ -112,9 +129,9 @@ INSERT_GENTPROT_BASIC( dotxv_ker_name )
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
cntx_t* cntx \
|
||||
dim_t n, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
cntx_t* cntx \
|
||||
); \
|
||||
|
||||
INSERT_GENTPROT_BASIC( invertv_ker_name )
|
||||
@@ -125,11 +142,11 @@ INSERT_GENTPROT_BASIC( invertv_ker_name )
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
conj_t conjalpha, \
|
||||
dim_t n, \
|
||||
ctype* alpha, \
|
||||
ctype* x, inc_t incx, \
|
||||
cntx_t* cntx \
|
||||
conj_t conjalpha, \
|
||||
dim_t n, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
cntx_t* cntx \
|
||||
); \
|
||||
|
||||
INSERT_GENTPROT_BASIC( scalv_ker_name )
|
||||
@@ -141,11 +158,27 @@ INSERT_GENTPROT_BASIC( setv_ker_name )
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
dim_t n, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
); \
|
||||
|
||||
INSERT_GENTPROT_BASIC( swapv_ker_name )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, opname ) \
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
); \
|
||||
|
||||
INSERT_GENTPROT_BASIC( xpbyv_ker_name )
|
||||
|
||||
|
||||
@@ -82,6 +82,64 @@ GENFRONT( copyv )
|
||||
GENFRONT( subv )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
\
|
||||
void PASTEMAC(opname,EX_SUF) \
|
||||
( \
|
||||
obj_t* alpha, \
|
||||
obj_t* x, \
|
||||
obj_t* beta, \
|
||||
obj_t* y \
|
||||
BLIS_OAPI_CNTX_PARAM \
|
||||
) \
|
||||
{ \
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_datatype( *x ); \
|
||||
\
|
||||
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||
dim_t n = bli_obj_vector_dim( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( *x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( *y ); \
|
||||
\
|
||||
void* buf_alpha; \
|
||||
void* buf_beta; \
|
||||
\
|
||||
obj_t alpha_local; \
|
||||
obj_t beta_local; \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( alpha, x, beta, y ); \
|
||||
\
|
||||
/* Create local copy-casts of scalars (and apply internal conjugation
|
||||
as needed). */ \
|
||||
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
|
||||
alpha, &alpha_local ); \
|
||||
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
|
||||
beta, &beta_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \
|
||||
\
|
||||
/* Invoke the void pointer-based function. */ \
|
||||
bli_call_ft_9 \
|
||||
( \
|
||||
dt, \
|
||||
opname, \
|
||||
conjx, \
|
||||
n, \
|
||||
buf_alpha, \
|
||||
buf_x, inc_x, \
|
||||
buf_beta, \
|
||||
buf_y, inc_y, \
|
||||
cntx \
|
||||
); \
|
||||
}
|
||||
|
||||
GENFRONT( axpbyv )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
@@ -366,5 +424,57 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
GENFRONT( swapv )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
\
|
||||
void PASTEMAC(opname,EX_SUF) \
|
||||
( \
|
||||
obj_t* x, \
|
||||
obj_t* beta, \
|
||||
obj_t* y \
|
||||
BLIS_OAPI_CNTX_PARAM \
|
||||
) \
|
||||
{ \
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_datatype( *x ); \
|
||||
\
|
||||
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||
dim_t n = bli_obj_vector_dim( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( *x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( *y ); \
|
||||
\
|
||||
void* buf_beta; \
|
||||
\
|
||||
obj_t beta_local; \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( x, beta, y ); \
|
||||
\
|
||||
/* Create local copy-casts of scalars (and apply internal conjugation
|
||||
as needed). */ \
|
||||
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
|
||||
beta, &beta_local ); \
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \
|
||||
\
|
||||
/* Invoke the void pointer-based function. */ \
|
||||
bli_call_ft_8 \
|
||||
( \
|
||||
dt, \
|
||||
opname, \
|
||||
conjx, \
|
||||
n, \
|
||||
buf_x, inc_x, \
|
||||
buf_beta, \
|
||||
buf_y, inc_y, \
|
||||
cntx \
|
||||
); \
|
||||
}
|
||||
|
||||
GENFRONT( xpbyv )
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -52,6 +52,21 @@ GENTPROT( copyv )
|
||||
GENTPROT( subv )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( opname ) \
|
||||
\
|
||||
void PASTEMAC(opname,EX_SUF) \
|
||||
( \
|
||||
obj_t* alpha, \
|
||||
obj_t* x, \
|
||||
obj_t* beta, \
|
||||
obj_t* y \
|
||||
BLIS_OAPI_CNTX_PARAM \
|
||||
);
|
||||
|
||||
GENTPROT( axpbyv )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( opname ) \
|
||||
\
|
||||
@@ -135,3 +150,17 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
|
||||
GENTPROT( swapv )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( opname ) \
|
||||
\
|
||||
void PASTEMAC(opname,EX_SUF) \
|
||||
( \
|
||||
obj_t* x, \
|
||||
obj_t* beta, \
|
||||
obj_t* y \
|
||||
BLIS_OAPI_CNTX_PARAM \
|
||||
);
|
||||
|
||||
GENTPROT( xpbyv )
|
||||
|
||||
|
||||
@@ -74,6 +74,44 @@ INSERT_GENTFUNC_BASIC( copyv, BLIS_COPYV_KER )
|
||||
INSERT_GENTFUNC_BASIC( subv, BLIS_SUBV_KER )
|
||||
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, opname, kerid ) \
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* alpha, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* beta, \
|
||||
ctype* y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
cntx_t* cntx_p; \
|
||||
\
|
||||
bli_cntx_init_local_if( opname, cntx, cntx_p ); \
|
||||
\
|
||||
PASTECH2(ch,opname,_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx_p ); \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
conjx, \
|
||||
n, \
|
||||
alpha, \
|
||||
x, incx, \
|
||||
beta, \
|
||||
y, incy, \
|
||||
cntx_p \
|
||||
); \
|
||||
\
|
||||
bli_cntx_finalize_local_if( opname, cntx ); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( axpbyv, BLIS_AXPBYV_KER )
|
||||
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, opname, kerid ) \
|
||||
\
|
||||
@@ -287,3 +325,39 @@ void PASTEMAC(ch,opname) \
|
||||
|
||||
INSERT_GENTFUNC_BASIC( swapv, BLIS_SWAPV_KER )
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, opname, kerid ) \
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* beta, \
|
||||
ctype* y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
cntx_t* cntx_p; \
|
||||
\
|
||||
bli_cntx_init_local_if( opname, cntx, cntx_p ); \
|
||||
\
|
||||
PASTECH2(ch,opname,_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx_p ); \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
conjx, \
|
||||
n, \
|
||||
x, incx, \
|
||||
beta, \
|
||||
y, incy, \
|
||||
cntx_p \
|
||||
); \
|
||||
\
|
||||
bli_cntx_finalize_local_if( opname, cntx ); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( xpbyv, BLIS_XPBYV_KER )
|
||||
|
||||
|
||||
|
||||
@@ -40,6 +40,9 @@
|
||||
#undef addv_ker_name
|
||||
#define addv_ker_name addv
|
||||
|
||||
#undef axpbyv_ker_name
|
||||
#define axpbyv_ker_name axpbyv
|
||||
|
||||
#undef axpyv_ker_name
|
||||
#define axpyv_ker_name axpyv
|
||||
|
||||
@@ -70,6 +73,9 @@
|
||||
#undef swapv_ker_name
|
||||
#define swapv_ker_name swapv
|
||||
|
||||
#undef xpbyv_ker_name
|
||||
#define xpbyv_ker_name xpbyv
|
||||
|
||||
|
||||
// Include the level-1v kernel API template.
|
||||
|
||||
|
||||
@@ -39,15 +39,15 @@
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* chi1; \
|
||||
ctype* psi1; \
|
||||
ctype* restrict chi1; \
|
||||
ctype* restrict psi1; \
|
||||
dim_t i; \
|
||||
\
|
||||
if ( bli_zero_dim1( n ) ) return; \
|
||||
@@ -57,22 +57,42 @@ void PASTEMAC(ch,varname) \
|
||||
\
|
||||
if ( bli_is_conj( conjx ) ) \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
if (incx == 1 && incy == 1) \
|
||||
{ \
|
||||
PASTEMAC(ch,addjs)( *chi1, *psi1 ); \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,addjs)( chi1[i], psi1[i] ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,addjs)( *chi1, *psi1 ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
if (incx == 1 && incy == 1) \
|
||||
{ \
|
||||
PASTEMAC(ch,adds)( *chi1, *psi1 ); \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,adds)( chi1[i], psi1[i] ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,adds)( *chi1, *psi1 ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
244
frame/1/kernels/bli_axpbyv_ref.c
Normal file
244
frame/1/kernels/bli_axpbyv_ref.c
Normal file
@@ -0,0 +1,244 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict chi1; \
|
||||
ctype* restrict psi1; \
|
||||
dim_t i; \
|
||||
\
|
||||
if ( bli_zero_dim1( n ) ) return; \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq0)( *alpha ) ) \
|
||||
{ \
|
||||
/* If alpha is zero and beta is zero, set to zero. */ \
|
||||
if ( PASTEMAC(ch,eq0)( *beta ) ) \
|
||||
{ \
|
||||
ctype* zero = PASTEMAC(ch,0); \
|
||||
\
|
||||
/* Query the context for the kernel function pointer. */ \
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
PASTECH(ch,setv_ft) setv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SETV_KER, cntx ); \
|
||||
\
|
||||
setv_p \
|
||||
( \
|
||||
BLIS_NO_CONJUGATE, \
|
||||
n, \
|
||||
zero, \
|
||||
y, incy, \
|
||||
cntx \
|
||||
); \
|
||||
return; \
|
||||
} \
|
||||
/* If alpha is zero and beta is one, return. */ \
|
||||
else if ( PASTEMAC(ch,eq1)( *beta ) ) \
|
||||
{ \
|
||||
return; \
|
||||
} \
|
||||
/* If alpha is zero, scale by beta. */ \
|
||||
else \
|
||||
{ \
|
||||
/* Query the context for the kernel function pointer. */ \
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
PASTECH(ch,scalv_ft) scalv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SCALV_KER, cntx ); \
|
||||
\
|
||||
scalv_p \
|
||||
( \
|
||||
BLIS_NO_CONJUGATE, \
|
||||
n, \
|
||||
beta, \
|
||||
y, incy, \
|
||||
cntx \
|
||||
); \
|
||||
return; \
|
||||
} \
|
||||
\
|
||||
} \
|
||||
else if ( PASTEMAC(ch,eq1)( *alpha ) ) \
|
||||
{ \
|
||||
/* If alpha is one and beta is zero, copy. */ \
|
||||
if ( PASTEMAC(ch,eq0)( *beta ) ) \
|
||||
{ \
|
||||
/* Query the context for the kernel function pointer. */ \
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \
|
||||
\
|
||||
copyv_p \
|
||||
( \
|
||||
conjx, \
|
||||
n, \
|
||||
x, incx, \
|
||||
y, incy, \
|
||||
cntx \
|
||||
); \
|
||||
return; \
|
||||
} \
|
||||
/* If alpha is one and beta is one, add. */ \
|
||||
else if ( PASTEMAC(ch,eq1)( *beta ) ) \
|
||||
{ \
|
||||
/* Query the context for the kernel function pointer. */ \
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
PASTECH(ch,addv_ft) addv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_ADDV_KER, cntx ); \
|
||||
\
|
||||
addv_p \
|
||||
( \
|
||||
conjx, \
|
||||
n, \
|
||||
x, incx, \
|
||||
y, incy, \
|
||||
cntx \
|
||||
); \
|
||||
return; \
|
||||
} \
|
||||
/* If alpha is one, call xpby. */ \
|
||||
else \
|
||||
{ \
|
||||
/* Query the context for the kernel function pointer. */ \
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
PASTECH(ch,xpbyv_ft) xpbyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_XPBYV_KER, cntx ); \
|
||||
\
|
||||
xpbyv_p \
|
||||
( \
|
||||
conjx, \
|
||||
n, \
|
||||
x, incx, \
|
||||
beta, \
|
||||
y, incy, \
|
||||
cntx \
|
||||
); \
|
||||
return; \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
/* If beta is zero, call scal2. */ \
|
||||
if ( PASTEMAC(ch,eq0)( *beta ) ) \
|
||||
{ \
|
||||
/* Query the context for the kernel function pointer. */ \
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
PASTECH(ch,scal2v_ft) scal2v_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SCAL2V_KER, cntx ); \
|
||||
\
|
||||
scal2v_p \
|
||||
( \
|
||||
conjx, \
|
||||
n, \
|
||||
alpha, \
|
||||
x, incx, \
|
||||
y, incy, \
|
||||
cntx \
|
||||
); \
|
||||
return; \
|
||||
} \
|
||||
/* If beta is one, call axpy. */ \
|
||||
else if ( PASTEMAC(ch,eq1)( *beta ) ) \
|
||||
{ \
|
||||
/* Query the context for the kernel function pointer. */ \
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
PASTECH(ch,axpyv_ft) axpyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \
|
||||
\
|
||||
axpyv_p \
|
||||
( \
|
||||
conjx, \
|
||||
n, \
|
||||
alpha, \
|
||||
x, incx, \
|
||||
y, incy, \
|
||||
cntx \
|
||||
); \
|
||||
return; \
|
||||
} \
|
||||
\
|
||||
} \
|
||||
\
|
||||
chi1 = x; \
|
||||
psi1 = y; \
|
||||
\
|
||||
if ( bli_is_conj( conjx ) ) \
|
||||
{ \
|
||||
if ( incx == 1 && incy == 1 ) \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,axpbyjs)( *alpha, chi1[i], *beta, psi1[i] ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,axpbyjs)( *alpha, *chi1, *beta, *psi1 ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
if ( incx == 1 && incy == 1 ) \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,axpbys)( *alpha, chi1[i], *beta, psi1[i] ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,axpbys)( *alpha, *chi1, *beta, *psi1 ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC0( axpbyv_ref )
|
||||
|
||||
@@ -39,16 +39,16 @@
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* alpha, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* chi1; \
|
||||
ctype* psi1; \
|
||||
ctype* restrict chi1; \
|
||||
ctype* restrict psi1; \
|
||||
dim_t i; \
|
||||
\
|
||||
if ( bli_zero_dim1( n ) ) return; \
|
||||
@@ -79,22 +79,42 @@ void PASTEMAC(ch,varname) \
|
||||
\
|
||||
if ( bli_is_conj( conjx ) ) \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
if ( incx == 1 && incy == 1 ) \
|
||||
{ \
|
||||
PASTEMAC(ch,axpyjs)( *alpha, *chi1, *psi1 ); \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,axpyjs)( *alpha, chi1[i], psi1[i] ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,axpyjs)( *alpha, *chi1, *psi1 ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
if ( incx == 1 && incy == 1 ) \
|
||||
{ \
|
||||
PASTEMAC(ch,axpys)( *alpha, *chi1, *psi1 ); \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,axpys)( *alpha, chi1[i], psi1[i] ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,axpys)( *alpha, *chi1, *psi1 ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
@@ -39,15 +39,15 @@
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* chi1; \
|
||||
ctype* psi1; \
|
||||
ctype* restrict chi1; \
|
||||
ctype* restrict psi1; \
|
||||
dim_t i; \
|
||||
\
|
||||
if ( bli_zero_dim1( n ) ) return; \
|
||||
@@ -57,22 +57,42 @@ void PASTEMAC(ch,varname) \
|
||||
\
|
||||
if ( bli_is_conj( conjx ) ) \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
if ( incx == 1 && incy == 1 ) \
|
||||
{ \
|
||||
PASTEMAC(ch,copyjs)( *chi1, *psi1 ); \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,copyjs)( chi1[i], psi1[i] ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,copyjs)( *chi1, *psi1 ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
if ( incx == 1 && incy == 1 ) \
|
||||
{ \
|
||||
PASTEMAC(ch,copys)( *chi1, *psi1 ); \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,copys)( chi1[i], psi1[i] ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,copys)( *chi1, *psi1 ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
@@ -39,17 +39,17 @@
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* rho, \
|
||||
cntx_t* cntx \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
ctype* restrict rho, \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* chi1; \
|
||||
ctype* psi1; \
|
||||
ctype* restrict chi1; \
|
||||
ctype* restrict psi1; \
|
||||
ctype dotxy; \
|
||||
dim_t i; \
|
||||
conj_t conjx_use; \
|
||||
@@ -75,22 +75,42 @@ void PASTEMAC(ch,varname) \
|
||||
\
|
||||
if ( bli_is_conj( conjx_use ) ) \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
if ( incx == 1 && incy == 1 ) \
|
||||
{ \
|
||||
PASTEMAC(ch,dotjs)( *chi1, *psi1, dotxy ); \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,dotjs)( chi1[i], psi1[i], dotxy ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,dotjs)( *chi1, *psi1, dotxy ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
if ( incx == 1 && incy == 1 ) \
|
||||
{ \
|
||||
PASTEMAC(ch,dots)( *chi1, *psi1, dotxy ); \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,dots)( chi1[i], psi1[i], dotxy ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,dots)( *chi1, *psi1, dotxy ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
|
||||
@@ -39,19 +39,19 @@
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
ctype* alpha, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* beta, \
|
||||
ctype* rho, \
|
||||
cntx_t* cntx \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict rho, \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* chi1; \
|
||||
ctype* psi1; \
|
||||
ctype* restrict chi1; \
|
||||
ctype* restrict psi1; \
|
||||
ctype dotxy; \
|
||||
dim_t i; \
|
||||
conj_t conjx_use; \
|
||||
@@ -83,22 +83,42 @@ void PASTEMAC(ch,varname) \
|
||||
\
|
||||
if ( bli_is_conj( conjx_use ) ) \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
if ( incx == 1 && incy == 1 ) \
|
||||
{ \
|
||||
PASTEMAC(ch,dotjs)( *chi1, *psi1, dotxy ); \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,dotjs)( chi1[i], psi1[i], dotxy ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,dotjs)( *chi1, *psi1, dotxy ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
if ( incx == 1 && incy == 1 ) \
|
||||
{ \
|
||||
PASTEMAC(ch,dots)( *chi1, *psi1, dotxy ); \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,dots)( chi1[i], psi1[i], dotxy ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,dots)( *chi1, *psi1, dotxy ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
|
||||
@@ -39,23 +39,33 @@
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
cntx_t* cntx \
|
||||
dim_t n, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* chi1; \
|
||||
ctype* restrict chi1; \
|
||||
dim_t i; \
|
||||
\
|
||||
if ( bli_zero_dim1( n ) ) return; \
|
||||
\
|
||||
chi1 = x; \
|
||||
\
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
if ( incx == 1 ) \
|
||||
{ \
|
||||
PASTEMAC(ch,inverts)( *chi1 ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,inverts)( chi1[i] ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,inverts)( *chi1 ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
@@ -32,116 +32,48 @@
|
||||
|
||||
*/
|
||||
|
||||
// Redefine level-1v kernel API names to induce prototypes.
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
#undef addv_ker_name
|
||||
#define addv_ker_name addv_ref
|
||||
|
||||
INSERT_GENTPROT_BASIC( addv_ref )
|
||||
INSERT_GENTPROT_BASIC( copyv_ref )
|
||||
INSERT_GENTPROT_BASIC( subv_ref )
|
||||
#undef axpbyv_ker_name
|
||||
#define axpbyv_ker_name axpbyv_ref
|
||||
|
||||
#undef axpyv_ker_name
|
||||
#define axpyv_ker_name axpyv_ref
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* alpha, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
#undef copyv_ker_name
|
||||
#define copyv_ker_name copyv_ref
|
||||
|
||||
INSERT_GENTPROT_BASIC( axpyv_ref )
|
||||
INSERT_GENTPROT_BASIC( scal2v_ref )
|
||||
#undef dotv_ker_name
|
||||
#define dotv_ker_name dotv_ref
|
||||
|
||||
#undef dotxv_ker_name
|
||||
#define dotxv_ker_name dotxv_ref
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* rho, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
#undef invertv_ker_name
|
||||
#define invertv_ker_name invertv_ref
|
||||
|
||||
INSERT_GENTPROT_BASIC( dotv_ref )
|
||||
#undef scalv_ker_name
|
||||
#define scalv_ker_name scalv_ref
|
||||
|
||||
#undef scal2v_ker_name
|
||||
#define scal2v_ker_name scal2v_ref
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
ctype* alpha, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* beta, \
|
||||
ctype* rho, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
#undef setv_ker_name
|
||||
#define setv_ker_name setv_ref
|
||||
|
||||
INSERT_GENTPROT_BASIC( dotxv_ref )
|
||||
#undef subv_ker_name
|
||||
#define subv_ker_name subv_ref
|
||||
|
||||
#undef swapv_ker_name
|
||||
#define swapv_ker_name swapv_ref
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
#undef xpbyv_ker_name
|
||||
#define xpbyv_ker_name xpbyv_ref
|
||||
|
||||
INSERT_GENTPROT_BASIC( invertv_ref )
|
||||
// Include the level-1v kernel API template.
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjalpha, \
|
||||
dim_t n, \
|
||||
ctype* alpha, \
|
||||
ctype* x, inc_t incx, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( scalv_ref )
|
||||
INSERT_GENTPROT_BASIC( setv_ref )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( swapv_ref )
|
||||
#include "bli_l1v_ker.h"
|
||||
|
||||
|
||||
@@ -39,16 +39,16 @@
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* alpha, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* chi1; \
|
||||
ctype* psi1; \
|
||||
ctype* restrict chi1; \
|
||||
ctype* restrict psi1; \
|
||||
dim_t i; \
|
||||
\
|
||||
if ( bli_zero_dim1( n ) ) return; \
|
||||
@@ -72,28 +72,65 @@ void PASTEMAC(ch,varname) \
|
||||
); \
|
||||
return; \
|
||||
} \
|
||||
/* If alpha is one, use copyv. */ \
|
||||
else if ( PASTEMAC(ch,eq0)( *alpha ) ) \
|
||||
{ \
|
||||
/* Query the context for the kernel function pointer. */ \
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \
|
||||
\
|
||||
copyv_p \
|
||||
( \
|
||||
BLIS_NO_CONJUGATE, \
|
||||
n, \
|
||||
x, incx, \
|
||||
y, incy, \
|
||||
cntx \
|
||||
); \
|
||||
return; \
|
||||
} \
|
||||
\
|
||||
chi1 = x; \
|
||||
psi1 = y; \
|
||||
\
|
||||
if ( bli_is_conj( conjx ) ) \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
if ( incx == 1 && incy == 1 ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *alpha, *chi1, *psi1 ); \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *alpha, chi1[i], psi1[i] ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2js)( *alpha, *chi1, *psi1 ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
if ( incx == 1 && incy == 1 ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *alpha, *chi1, *psi1 ); \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *alpha, chi1[i], psi1[i] ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scal2s)( *alpha, *chi1, *psi1 ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
@@ -39,14 +39,14 @@
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjalpha, \
|
||||
dim_t n, \
|
||||
ctype* alpha, \
|
||||
ctype* x, inc_t incx, \
|
||||
cntx_t* cntx \
|
||||
conj_t conjalpha, \
|
||||
dim_t n, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* chi1; \
|
||||
ctype* restrict chi1; \
|
||||
ctype alpha_conj; \
|
||||
dim_t i; \
|
||||
\
|
||||
@@ -79,11 +79,21 @@ void PASTEMAC(ch,varname) \
|
||||
\
|
||||
chi1 = x; \
|
||||
\
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
if ( incx == 1 ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scals)( alpha_conj, *chi1 ); \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scals)( alpha_conj, chi1[i] ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,scals)( alpha_conj, *chi1 ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
chi1 += incx; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
@@ -39,14 +39,14 @@
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjalpha, \
|
||||
dim_t n, \
|
||||
ctype* alpha, \
|
||||
ctype* x, inc_t incx, \
|
||||
cntx_t* cntx \
|
||||
conj_t conjalpha, \
|
||||
dim_t n, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* chi1; \
|
||||
ctype* restrict chi1; \
|
||||
ctype alpha_conj; \
|
||||
dim_t i; \
|
||||
\
|
||||
@@ -56,22 +56,42 @@ void PASTEMAC(ch,varname) \
|
||||
\
|
||||
if ( PASTEMAC(ch,eq0)( *alpha ) ) \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
if ( incx == 1 ) \
|
||||
{ \
|
||||
PASTEMAC(ch,set0s)( *chi1 ); \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,set0s)( chi1[i] ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,set0s)( *chi1 ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
chi1 += incx; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
PASTEMAC(ch,copycjs)( conjalpha, *alpha, alpha_conj ); \
|
||||
\
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
if ( incx == 1 ) \
|
||||
{ \
|
||||
PASTEMAC(ch,copys)( alpha_conj, *chi1 ); \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,copys)( alpha_conj, chi1[i] ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,copys)( alpha_conj, *chi1 ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
chi1 += incx; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
@@ -39,15 +39,15 @@
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* chi1; \
|
||||
ctype* psi1; \
|
||||
ctype* restrict chi1; \
|
||||
ctype* restrict psi1; \
|
||||
dim_t i; \
|
||||
\
|
||||
if ( bli_zero_dim1( n ) ) return; \
|
||||
@@ -57,22 +57,42 @@ void PASTEMAC(ch,varname) \
|
||||
\
|
||||
if ( bli_is_conj( conjx ) ) \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
if ( incx == 1 && incy == 1 ) \
|
||||
{ \
|
||||
PASTEMAC(ch,subjs)( *chi1, *psi1 ); \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,subjs)( chi1[i], psi1[i] ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,subjs)( *chi1, *psi1 ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
if ( incx == 1 && incy == 1 ) \
|
||||
{ \
|
||||
PASTEMAC(ch,subs)( *chi1, *psi1 ); \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,subs)( chi1[i], psi1[i] ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,subs)( *chi1, *psi1 ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
@@ -39,14 +39,14 @@
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
dim_t n, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
dim_t n, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* chi1; \
|
||||
ctype* psi1; \
|
||||
ctype* restrict chi1; \
|
||||
ctype* restrict psi1; \
|
||||
dim_t i; \
|
||||
\
|
||||
if ( bli_zero_dim1( n ) ) return; \
|
||||
@@ -54,12 +54,22 @@ void PASTEMAC(ch,varname) \
|
||||
chi1 = x; \
|
||||
psi1 = y; \
|
||||
\
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
if ( incx == 1 && incy == 1 ) \
|
||||
{ \
|
||||
PASTEMAC(ch,swaps)( *chi1, *psi1 ); \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,swaps)( chi1[i], psi1[i] ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,swaps)( *chi1, *psi1 ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
137
frame/1/kernels/bli_xpbyv_ref.c
Normal file
137
frame/1/kernels/bli_xpbyv_ref.c
Normal file
@@ -0,0 +1,137 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict chi1; \
|
||||
ctype* restrict psi1; \
|
||||
dim_t i; \
|
||||
\
|
||||
if ( bli_zero_dim1( n ) ) return; \
|
||||
\
|
||||
/* If beta is zero, use copyv. */ \
|
||||
if ( PASTEMAC(ch,eq0)( *beta ) ) \
|
||||
{ \
|
||||
/* Query the context for the kernel function pointer. */ \
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \
|
||||
\
|
||||
copyv_p \
|
||||
( \
|
||||
conjx, \
|
||||
n, \
|
||||
x, incx, \
|
||||
y, incy, \
|
||||
cntx \
|
||||
); \
|
||||
return; \
|
||||
} \
|
||||
/* If alpha is one, use addv. */ \
|
||||
else if ( PASTEMAC(ch,eq1)( *beta ) ) \
|
||||
{ \
|
||||
/* Query the context for the kernel function pointer. */ \
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
PASTECH(ch,addv_ft) addv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_ADDV_KER, cntx ); \
|
||||
\
|
||||
addv_p \
|
||||
( \
|
||||
conjx, \
|
||||
n, \
|
||||
x, incx, \
|
||||
y, incy, \
|
||||
cntx \
|
||||
); \
|
||||
return; \
|
||||
} \
|
||||
\
|
||||
chi1 = x; \
|
||||
psi1 = y; \
|
||||
\
|
||||
if ( bli_is_conj( conjx ) ) \
|
||||
{ \
|
||||
if ( incx == 1 && incy == 1 ) \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,xpbyjs)( chi1[i], *beta, psi1[i] ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,xpbyjs)( *chi1, *beta, *psi1 ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
if ( incx == 1 && incy == 1 ) \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,xpbys)( chi1[i], *beta, psi1[i] ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC(ch,xpbys)( *chi1, *beta, *psi1 ); \
|
||||
\
|
||||
chi1 += incx; \
|
||||
psi1 += incy; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC0( xpbyv_ref )
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
packv_t* packv_cntl;
|
||||
packv_t* packv_cntl = NULL;
|
||||
|
||||
void bli_packv_cntl_init( void )
|
||||
{
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
scalv_t* scalv_cntl;
|
||||
scalv_t* scalv_cntl = NULL;
|
||||
|
||||
void bli_scalv_cntl_init()
|
||||
{
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
unpackv_t* unpackv_cntl;
|
||||
unpackv_t* unpackv_cntl = NULL;
|
||||
|
||||
void bli_unpackv_cntl_init()
|
||||
{
|
||||
|
||||
@@ -50,11 +50,11 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
ctype* alpha1, \
|
||||
ctype* alpha2, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* z, inc_t incz, \
|
||||
ctype* restrict alpha1, \
|
||||
ctype* restrict alpha2, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
ctype* restrict z, inc_t incz, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
@@ -71,10 +71,10 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype* alpha, \
|
||||
ctype* a, inc_t inca, inc_t lda, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
@@ -91,11 +91,11 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t m, \
|
||||
ctype* alpha, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* rho, \
|
||||
ctype* z, inc_t incz, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
ctype* restrict rho, \
|
||||
ctype* restrict z, inc_t incz, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
@@ -112,11 +112,11 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype* alpha, \
|
||||
ctype* a, inc_t inca, inc_t lda, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* beta, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
@@ -135,13 +135,13 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype* alpha, \
|
||||
ctype* a, inc_t inca, inc_t lda, \
|
||||
ctype* w, inc_t incw, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* beta, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* z, inc_t incz, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict w, inc_t incw, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
ctype* restrict z, inc_t incz, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
|
||||
@@ -42,15 +42,15 @@
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
ctype* alphax, \
|
||||
ctype* alphay, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* z, inc_t incz, \
|
||||
cntx_t* cntx \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
ctype* restrict alphax, \
|
||||
ctype* restrict alphay, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
ctype* restrict z, inc_t incz, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( axpy2v_ker_name )
|
||||
@@ -61,15 +61,15 @@ INSERT_GENTPROT_BASIC( axpy2v_ker_name )
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
conj_t conja, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype* alpha, \
|
||||
ctype* a, inc_t inca, inc_t lda, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
conj_t conja, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( axpyf_ker_name )
|
||||
@@ -80,16 +80,16 @@ INSERT_GENTPROT_BASIC( axpyf_ker_name )
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
conj_t conjxt, \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
ctype* alpha, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* rho, \
|
||||
ctype* z, inc_t incz, \
|
||||
cntx_t* cntx \
|
||||
conj_t conjxt, \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
ctype* restrict rho, \
|
||||
ctype* restrict z, inc_t incz, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( dotaxpyv_ker_name )
|
||||
@@ -100,20 +100,20 @@ INSERT_GENTPROT_BASIC( dotaxpyv_ker_name )
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
conj_t conjat, \
|
||||
conj_t conja, \
|
||||
conj_t conjw, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype* alpha, \
|
||||
ctype* a, inc_t inca, inc_t lda, \
|
||||
ctype* w, inc_t incw, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* beta, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* z, inc_t incz, \
|
||||
cntx_t* cntx \
|
||||
conj_t conjat, \
|
||||
conj_t conja, \
|
||||
conj_t conjw, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict w, inc_t incw, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
ctype* restrict z, inc_t incz, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( dotxaxpyf_ker_name )
|
||||
@@ -124,16 +124,16 @@ INSERT_GENTPROT_BASIC( dotxaxpyf_ker_name )
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
conj_t conjat, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype* alpha, \
|
||||
ctype* a, inc_t inca, inc_t lda, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* beta, \
|
||||
ctype* y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
conj_t conjat, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( dotxf_ker_name )
|
||||
|
||||
@@ -40,15 +40,15 @@
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
ctype* alphax, \
|
||||
ctype* alphay, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* z, inc_t incz, \
|
||||
cntx_t* cntx \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
ctype* restrict alphax, \
|
||||
ctype* restrict alphay, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
ctype* restrict z, inc_t incz, \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
/* Query the context for the kernel function pointer. */ \
|
||||
|
||||
@@ -40,15 +40,15 @@
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conja, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype* alpha, \
|
||||
ctype* a, inc_t inca, inc_t lda, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
conj_t conja, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* a1; \
|
||||
|
||||
@@ -40,16 +40,16 @@
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjxt, \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t m, \
|
||||
ctype* alpha, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* rho, \
|
||||
ctype* z, inc_t incz, \
|
||||
cntx_t* cntx \
|
||||
conj_t conjxt, \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t m, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
ctype* restrict rho, \
|
||||
ctype* restrict z, inc_t incz, \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* one = PASTEMAC(ch,1); \
|
||||
|
||||
@@ -40,20 +40,20 @@
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjat, \
|
||||
conj_t conja, \
|
||||
conj_t conjw, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype* alpha, \
|
||||
ctype* a, inc_t inca, inc_t lda, \
|
||||
ctype* w, inc_t incw, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* beta, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* z, inc_t incz, \
|
||||
cntx_t* cntx \
|
||||
conj_t conjat, \
|
||||
conj_t conja, \
|
||||
conj_t conjw, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict w, inc_t incw, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
ctype* restrict z, inc_t incz, \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
/* A is m x n. */ \
|
||||
|
||||
@@ -40,16 +40,16 @@
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjat, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype* alpha, \
|
||||
ctype* a, inc_t inca, inc_t lda, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* beta, \
|
||||
ctype* y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
conj_t conjat, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* a1; \
|
||||
|
||||
@@ -32,129 +32,24 @@
|
||||
|
||||
*/
|
||||
|
||||
// Redefine level-1f kernel API names to induce prototypes.
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t n, \
|
||||
ctype* alpha1, \
|
||||
ctype* alpha2, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* z, inc_t incz, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
#undef axpy2v_ker_name
|
||||
#define axpy2v_ker_name axpy2v_ref
|
||||
|
||||
INSERT_GENTPROT_BASIC( axpy2v_ref )
|
||||
#undef dotaxpyv_ker_name
|
||||
#define dotaxpyv_ker_name dotaxpyv_ref
|
||||
|
||||
#undef axpyf_ker_name
|
||||
#define axpyf_ker_name axpyf_ref
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conja, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype* alpha, \
|
||||
ctype* a, inc_t inca, inc_t lda, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
#undef dotxf_ker_name
|
||||
#define dotxf_ker_name dotxf_ref
|
||||
|
||||
INSERT_GENTPROT_BASIC( axpyf_ref )
|
||||
#undef dotxaxpyf_ker_name
|
||||
#define dotxaxpyf_ker_name dotxaxpyf_ref_var2
|
||||
|
||||
// Include the level-1f kernel API template.
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjxt, \
|
||||
conj_t conjx, \
|
||||
conj_t conjy, \
|
||||
dim_t m, \
|
||||
ctype* alpha, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* rho, \
|
||||
ctype* z, inc_t incz, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( dotaxpyv_ref )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjat, \
|
||||
conj_t conja, \
|
||||
conj_t conjw, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype* alpha, \
|
||||
ctype* a, inc_t inca, inc_t lda, \
|
||||
ctype* w, inc_t incw, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* beta, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* z, inc_t incz, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( dotxaxpyf_ref_var1 )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjat, \
|
||||
conj_t conja, \
|
||||
conj_t conjw, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype* alpha, \
|
||||
ctype* a, inc_t inca, inc_t lda, \
|
||||
ctype* w, inc_t incw, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* beta, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* z, inc_t incz, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( dotxaxpyf_ref_var2 )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjat, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype* alpha, \
|
||||
ctype* a, inc_t inca, inc_t lda, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* beta, \
|
||||
ctype* y, inc_t incy, \
|
||||
cntx_t* cntx \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( dotxf_ref )
|
||||
#include "bli_l1f_ker.h"
|
||||
|
||||
|
||||
@@ -40,20 +40,20 @@
|
||||
\
|
||||
void PASTEMAC(ch,varname) \
|
||||
( \
|
||||
conj_t conjat, \
|
||||
conj_t conja, \
|
||||
conj_t conjw, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype* alpha, \
|
||||
ctype* a, inc_t inca, inc_t lda, \
|
||||
ctype* w, inc_t incw, \
|
||||
ctype* x, inc_t incx, \
|
||||
ctype* beta, \
|
||||
ctype* y, inc_t incy, \
|
||||
ctype* z, inc_t incz, \
|
||||
cntx_t* cntx \
|
||||
conj_t conjat, \
|
||||
conj_t conja, \
|
||||
conj_t conjw, \
|
||||
conj_t conjx, \
|
||||
dim_t m, \
|
||||
dim_t b_n, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict a, inc_t inca, inc_t lda, \
|
||||
ctype* restrict w, inc_t incw, \
|
||||
ctype* restrict x, inc_t incx, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict y, inc_t incy, \
|
||||
ctype* restrict z, inc_t incz, \
|
||||
cntx_t* cntx \
|
||||
) \
|
||||
{ \
|
||||
ctype* a1; \
|
||||
@@ -34,10 +34,10 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
packm_t* packm_cntl_row;
|
||||
packm_t* packm_cntl_col;
|
||||
packm_t* packm_cntl_row = NULL;
|
||||
packm_t* packm_cntl_col = NULL;
|
||||
|
||||
packm_t* packm_cntl;
|
||||
packm_t* packm_cntl = NULL;
|
||||
|
||||
void bli_packm_cntl_init()
|
||||
{
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
scalm_t* scalm_cntl;
|
||||
scalm_t* scalm_cntl = NULL;
|
||||
|
||||
void bli_scalm_cntl_init()
|
||||
{
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
unpackm_t* unpackm_cntl;
|
||||
unpackm_t* unpackm_cntl = NULL;
|
||||
|
||||
void bli_unpackm_cntl_init()
|
||||
{
|
||||
|
||||
@@ -39,17 +39,17 @@ extern packm_t* packm_cntl;
|
||||
extern packv_t* packv_cntl;
|
||||
extern unpackv_t* unpackv_cntl;
|
||||
|
||||
gemv_t* gemv_cntl_bs_ke_dot;
|
||||
gemv_t* gemv_cntl_bs_ke_axpy;
|
||||
gemv_t* gemv_cntl_bs_ke_dot = NULL;
|
||||
gemv_t* gemv_cntl_bs_ke_axpy = NULL;
|
||||
|
||||
gemv_t* gemv_cntl_rp_bs_dot;
|
||||
gemv_t* gemv_cntl_rp_bs_axpy;
|
||||
gemv_t* gemv_cntl_rp_bs_dot = NULL;
|
||||
gemv_t* gemv_cntl_rp_bs_axpy = NULL;
|
||||
|
||||
gemv_t* gemv_cntl_cp_bs_dot;
|
||||
gemv_t* gemv_cntl_cp_bs_axpy;
|
||||
gemv_t* gemv_cntl_cp_bs_dot = NULL;
|
||||
gemv_t* gemv_cntl_cp_bs_axpy = NULL;
|
||||
|
||||
gemv_t* gemv_cntl_ge_dot;
|
||||
gemv_t* gemv_cntl_ge_axpy;
|
||||
gemv_t* gemv_cntl_ge_dot = NULL;
|
||||
gemv_t* gemv_cntl_ge_axpy = NULL;
|
||||
|
||||
|
||||
void bli_gemv_cntl_init()
|
||||
|
||||
@@ -38,17 +38,17 @@ extern packm_t* packm_cntl;
|
||||
extern packv_t* packv_cntl;
|
||||
extern unpackm_t* unpackm_cntl;
|
||||
|
||||
ger_t* ger_cntl_bs_ke_row;
|
||||
ger_t* ger_cntl_bs_ke_col;
|
||||
ger_t* ger_cntl_bs_ke_row = NULL;
|
||||
ger_t* ger_cntl_bs_ke_col = NULL;
|
||||
|
||||
ger_t* ger_cntl_rp_bs_row;
|
||||
ger_t* ger_cntl_rp_bs_col;
|
||||
ger_t* ger_cntl_rp_bs_row = NULL;
|
||||
ger_t* ger_cntl_rp_bs_col = NULL;
|
||||
|
||||
ger_t* ger_cntl_cp_bs_row;
|
||||
ger_t* ger_cntl_cp_bs_col;
|
||||
ger_t* ger_cntl_cp_bs_row = NULL;
|
||||
ger_t* ger_cntl_cp_bs_col = NULL;
|
||||
|
||||
ger_t* ger_cntl_ge_row;
|
||||
ger_t* ger_cntl_ge_col;
|
||||
ger_t* ger_cntl_ge_row = NULL;
|
||||
ger_t* ger_cntl_ge_col = NULL;
|
||||
|
||||
|
||||
void bli_ger_cntl_init()
|
||||
|
||||
@@ -44,10 +44,10 @@ extern gemv_t* gemv_cntl_rp_bs_axpy;
|
||||
extern gemv_t* gemv_cntl_cp_bs_dot;
|
||||
extern gemv_t* gemv_cntl_cp_bs_axpy;
|
||||
|
||||
hemv_t* hemv_cntl_bs_ke_lrow_ucol;
|
||||
hemv_t* hemv_cntl_bs_ke_lcol_urow;
|
||||
hemv_t* hemv_cntl_ge_lrow_ucol;
|
||||
hemv_t* hemv_cntl_ge_lcol_urow;
|
||||
hemv_t* hemv_cntl_bs_ke_lrow_ucol = NULL;
|
||||
hemv_t* hemv_cntl_bs_ke_lcol_urow = NULL;
|
||||
hemv_t* hemv_cntl_ge_lrow_ucol = NULL;
|
||||
hemv_t* hemv_cntl_ge_lcol_urow = NULL;
|
||||
|
||||
|
||||
void bli_hemv_cntl_init()
|
||||
|
||||
@@ -43,11 +43,11 @@ extern ger_t* ger_cntl_cp_bs_col;
|
||||
extern ger_t* ger_cntl_bs_ke_row;
|
||||
extern ger_t* ger_cntl_bs_ke_col;
|
||||
|
||||
her_t* her_cntl_bs_ke_lrow_ucol;
|
||||
her_t* her_cntl_bs_ke_lcol_urow;
|
||||
her_t* her_cntl_bs_ke_lrow_ucol = NULL;
|
||||
her_t* her_cntl_bs_ke_lcol_urow = NULL;
|
||||
|
||||
her_t* her_cntl_ge_lrow_ucol;
|
||||
her_t* her_cntl_ge_lcol_urow;
|
||||
her_t* her_cntl_ge_lrow_ucol = NULL;
|
||||
her_t* her_cntl_ge_lcol_urow = NULL;
|
||||
|
||||
|
||||
void bli_her_cntl_init()
|
||||
|
||||
@@ -41,11 +41,11 @@ extern unpackm_t* unpackm_cntl;
|
||||
extern ger_t* ger_cntl_rp_bs_row;
|
||||
extern ger_t* ger_cntl_cp_bs_col;
|
||||
|
||||
her2_t* her2_cntl_bs_ke_lrow_ucol;
|
||||
her2_t* her2_cntl_bs_ke_lcol_urow;
|
||||
her2_t* her2_cntl_bs_ke_lrow_ucol = NULL;
|
||||
her2_t* her2_cntl_bs_ke_lcol_urow = NULL;
|
||||
|
||||
her2_t* her2_cntl_ge_lrow_ucol;
|
||||
her2_t* her2_cntl_ge_lcol_urow;
|
||||
her2_t* her2_cntl_ge_lrow_ucol = NULL;
|
||||
her2_t* her2_cntl_ge_lcol_urow = NULL;
|
||||
|
||||
|
||||
void bli_her2_cntl_init()
|
||||
|
||||
@@ -43,10 +43,10 @@ extern gemv_t* gemv_cntl_rp_bs_axpy;
|
||||
extern gemv_t* gemv_cntl_cp_bs_dot;
|
||||
extern gemv_t* gemv_cntl_cp_bs_axpy;
|
||||
|
||||
trmv_t* trmv_cntl_bs_ke_nrow_tcol;
|
||||
trmv_t* trmv_cntl_bs_ke_ncol_trow;
|
||||
trmv_t* trmv_cntl_ge_nrow_tcol;
|
||||
trmv_t* trmv_cntl_ge_ncol_trow;
|
||||
trmv_t* trmv_cntl_bs_ke_nrow_tcol = NULL;
|
||||
trmv_t* trmv_cntl_bs_ke_ncol_trow = NULL;
|
||||
trmv_t* trmv_cntl_ge_nrow_tcol = NULL;
|
||||
trmv_t* trmv_cntl_ge_ncol_trow = NULL;
|
||||
|
||||
|
||||
void bli_trmv_cntl_init()
|
||||
|
||||
@@ -44,10 +44,10 @@ extern gemv_t* gemv_cntl_rp_bs_axpy;
|
||||
extern gemv_t* gemv_cntl_cp_bs_dot;
|
||||
extern gemv_t* gemv_cntl_cp_bs_axpy;
|
||||
|
||||
trsv_t* trsv_cntl_bs_ke_nrow_tcol;
|
||||
trsv_t* trsv_cntl_bs_ke_ncol_trow;
|
||||
trsv_t* trsv_cntl_ge_nrow_tcol;
|
||||
trsv_t* trsv_cntl_ge_ncol_trow;
|
||||
trsv_t* trsv_cntl_bs_ke_nrow_tcol = NULL;
|
||||
trsv_t* trsv_cntl_bs_ke_ncol_trow = NULL;
|
||||
trsv_t* trsv_cntl_ge_nrow_tcol = NULL;
|
||||
trsv_t* trsv_cntl_ge_ncol_trow = NULL;
|
||||
|
||||
|
||||
void bli_trsv_cntl_init()
|
||||
|
||||
@@ -40,9 +40,8 @@
|
||||
|
||||
void bli_gemm_cntx_init( cntx_t* cntx )
|
||||
{
|
||||
bli_cntx_obj_create( cntx );
|
||||
|
||||
//bli_cntx_obj_clear( cntx );
|
||||
// Clear the context fields.
|
||||
bli_cntx_obj_clear( cntx );
|
||||
|
||||
// Initialize the context with the current architecture's native
|
||||
// level-3 gemm micro-kernel, and its output preferences.
|
||||
@@ -71,16 +70,14 @@ void bli_gemm_cntx_init( cntx_t* cntx )
|
||||
|
||||
void bli_gemm_cntx_finalize( cntx_t* cntx )
|
||||
{
|
||||
bli_cntx_obj_free( cntx );
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
void bli_trsm_cntx_init( cntx_t* cntx )
|
||||
{
|
||||
bli_cntx_obj_create( cntx );
|
||||
|
||||
//bli_cntx_obj_clear( cntx );
|
||||
// Clear the context fields.
|
||||
bli_cntx_obj_clear( cntx );
|
||||
|
||||
// Initialize the context with the current architecture's native
|
||||
// level-3 gemm micro-kernel, and its output preferences.
|
||||
@@ -116,6 +113,5 @@ void bli_trsm_cntx_init( cntx_t* cntx )
|
||||
|
||||
void bli_trsm_cntx_finalize( cntx_t* cntx )
|
||||
{
|
||||
bli_cntx_obj_free( cntx );
|
||||
}
|
||||
|
||||
|
||||
@@ -36,15 +36,15 @@
|
||||
|
||||
extern scalm_t* scalm_cntl;
|
||||
|
||||
packm_t* gemm_packa_cntl;
|
||||
packm_t* gemm_packb_cntl;
|
||||
packm_t* gemm_packa_cntl = NULL;
|
||||
packm_t* gemm_packb_cntl = NULL;
|
||||
|
||||
gemm_t* gemm_cntl_bp_ke;
|
||||
gemm_t* gemm_cntl_op_bp;
|
||||
gemm_t* gemm_cntl_mm_op;
|
||||
gemm_t* gemm_cntl_vl_mm;
|
||||
gemm_t* gemm_cntl_bp_ke = NULL;
|
||||
gemm_t* gemm_cntl_op_bp = NULL;
|
||||
gemm_t* gemm_cntl_mm_op = NULL;
|
||||
gemm_t* gemm_cntl_vl_mm = NULL;
|
||||
|
||||
gemm_t* gemm_cntl;
|
||||
gemm_t* gemm_cntl = NULL;
|
||||
|
||||
void bli_gemm_cntl_init()
|
||||
{
|
||||
|
||||
@@ -322,9 +322,11 @@ void PASTEMAC(ch,varname) \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
/*PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: c", MR, NR, c11, rs_c, cs_c, "%4.1f", "" );*/ \
|
||||
/*PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: b1", k, NR, b1, NR, 1, "%4.1f", "" ); \
|
||||
PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: a1", MR, k, a1, 1, MR, "%4.1f", "" );*/ \
|
||||
/*
|
||||
PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: b1", k, NR, b1, NR, 1, "%4.1f", "" ); \
|
||||
PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: a1", MR, k, a1, 1, MR, "%4.1f", "" ); \
|
||||
PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: c after", m_cur, n_cur, c11, rs_c, cs_c, "%4.1f", "" ); \
|
||||
*/ \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC0( gemm_ker_var2 )
|
||||
|
||||
@@ -38,24 +38,24 @@ extern scalm_t* scalm_cntl;
|
||||
|
||||
extern gemm_t* gemm_cntl_bp_ke;
|
||||
|
||||
packm_t* trsm_l_packa_cntl;
|
||||
packm_t* trsm_l_packb_cntl;
|
||||
packm_t* trsm_l_packa_cntl = NULL;
|
||||
packm_t* trsm_l_packb_cntl = NULL;
|
||||
|
||||
packm_t* trsm_r_packa_cntl;
|
||||
packm_t* trsm_r_packb_cntl;
|
||||
packm_t* trsm_r_packa_cntl = NULL;
|
||||
packm_t* trsm_r_packb_cntl = NULL;
|
||||
|
||||
trsm_t* trsm_cntl_bp_ke;
|
||||
trsm_t* trsm_cntl_bp_ke = NULL;
|
||||
|
||||
trsm_t* trsm_l_cntl_op_bp;
|
||||
trsm_t* trsm_l_cntl_mm_op;
|
||||
trsm_t* trsm_l_cntl_vl_mm;
|
||||
trsm_t* trsm_l_cntl_op_bp = NULL;
|
||||
trsm_t* trsm_l_cntl_mm_op = NULL;
|
||||
trsm_t* trsm_l_cntl_vl_mm = NULL;
|
||||
|
||||
trsm_t* trsm_r_cntl_op_bp;
|
||||
trsm_t* trsm_r_cntl_mm_op;
|
||||
trsm_t* trsm_r_cntl_vl_mm;
|
||||
trsm_t* trsm_r_cntl_op_bp = NULL;
|
||||
trsm_t* trsm_r_cntl_mm_op = NULL;
|
||||
trsm_t* trsm_r_cntl_vl_mm = NULL;
|
||||
|
||||
trsm_t* trsm_l_cntl;
|
||||
trsm_t* trsm_r_cntl;
|
||||
trsm_t* trsm_l_cntl = NULL;
|
||||
trsm_t* trsm_r_cntl = NULL;
|
||||
|
||||
|
||||
void bli_trsm_cntl_init()
|
||||
|
||||
@@ -56,59 +56,16 @@ void bli_cntx_obj_free( cntx_t* cntx )
|
||||
|
||||
void bli_cntx_obj_clear( cntx_t* cntx )
|
||||
{
|
||||
blksz_t* blkszs = bli_cntx_blkszs_buf( cntx );
|
||||
bszid_t* bmults = bli_cntx_bmults_buf( cntx );
|
||||
func_t* l3_vir_ukrs = bli_cntx_l3_vir_ukrs_buf( cntx );
|
||||
func_t* l3_nat_ukrs = bli_cntx_l3_nat_ukrs_buf( cntx );
|
||||
mbool_t* l3_nat_ukrs_prefs = bli_cntx_l3_nat_ukrs_prefs_buf( cntx );
|
||||
func_t* l1f_kers = bli_cntx_l1f_kers_buf( cntx );
|
||||
func_t* l1v_kers = bli_cntx_l1v_kers_buf( cntx );
|
||||
func_t* packm_ukrs = bli_cntx_packm_ukrs_buf( cntx );
|
||||
|
||||
dim_t i;
|
||||
|
||||
// Initialize all of the elements of every array to a sane initial
|
||||
// value. (Strictly speaking, there is no "null" value for typedef'ed
|
||||
// enums such as bszid_t, so we cheat a little by using 0.)
|
||||
|
||||
func_t null_func = { { NULL, NULL, NULL, NULL } };
|
||||
blksz_t null_blksz = { { 0, 0, 0, 0, } };
|
||||
mbool_t null_mbool = { { FALSE, FALSE, FALSE, FALSE } };
|
||||
bszid_t null_bszid = 0;
|
||||
|
||||
for ( i = 0; i < BLIS_NUM_BLKSZS; ++i )
|
||||
{
|
||||
blkszs[ i ] = null_blksz;
|
||||
}
|
||||
for ( i = 0; i < BLIS_NUM_BLKSZS; ++i )
|
||||
{
|
||||
bmults[ i ] = null_bszid;
|
||||
}
|
||||
for ( i = 0; i < BLIS_NUM_LEVEL3_UKRS; ++i )
|
||||
{
|
||||
l3_vir_ukrs[ i ] = null_func;
|
||||
l3_nat_ukrs[ i ] = null_func;
|
||||
l3_nat_ukrs_prefs[ i ] = null_mbool;
|
||||
}
|
||||
for ( i = 0; i < BLIS_NUM_LEVEL1F_KERS; ++i )
|
||||
{
|
||||
l1f_kers[ i ] = null_func;
|
||||
}
|
||||
for ( i = 0; i < BLIS_NUM_LEVEL1V_KERS; ++i )
|
||||
{
|
||||
l1v_kers[ i ] = null_func;
|
||||
}
|
||||
{
|
||||
packm_ukrs[ 0 ] = null_func;
|
||||
}
|
||||
|
||||
// NOTE: It doesn't make sense to initialize method or schema fields
|
||||
// at this time; the method field would normally be set by _set_blkszs()
|
||||
// and the schema fields are set by _set_pack_schema_[abc]().
|
||||
// Fill the entire cntx_t structure with zeros.
|
||||
memset( ( void* )cntx, 0, sizeof( cntx ) );
|
||||
}
|
||||
|
||||
void bli_cntx_init( cntx_t* cntx )
|
||||
{
|
||||
// This function initializes a "universal" context that is pre-loaded
|
||||
// with kernel addresses for all level-1v, -1f, and -3 kernels, in
|
||||
// addition to all level-1f and -3 blocksizes.
|
||||
|
||||
bli_gks_cntx_set_l3_nat_ukr( BLIS_GEMM_UKR, cntx );
|
||||
bli_gks_cntx_set_l3_nat_ukr( BLIS_GEMMTRSM_L_UKR, cntx );
|
||||
bli_gks_cntx_set_l3_nat_ukr( BLIS_GEMMTRSM_U_UKR, cntx );
|
||||
@@ -161,6 +118,7 @@ blksz_t* bli_cntx_get_blksz( bszid_t bs_id,
|
||||
return blksz;
|
||||
}
|
||||
|
||||
#if 0
|
||||
dim_t bli_cntx_get_blksz_def_dt( num_t dt,
|
||||
bszid_t bs_id,
|
||||
cntx_t* cntx )
|
||||
@@ -182,6 +140,7 @@ dim_t bli_cntx_get_blksz_max_dt( num_t dt,
|
||||
// Return the default blocksize value for the datatype given.
|
||||
return bli_blksz_get_max( dt, blksz );
|
||||
}
|
||||
#endif
|
||||
|
||||
blksz_t* bli_cntx_get_bmult( bszid_t bs_id,
|
||||
cntx_t* cntx )
|
||||
@@ -196,6 +155,7 @@ blksz_t* bli_cntx_get_bmult( bszid_t bs_id,
|
||||
return bmult;
|
||||
}
|
||||
|
||||
#if 0
|
||||
dim_t bli_cntx_get_bmult_dt( num_t dt,
|
||||
bszid_t bs_id,
|
||||
cntx_t* cntx )
|
||||
@@ -203,21 +163,8 @@ dim_t bli_cntx_get_bmult_dt( num_t dt,
|
||||
blksz_t* bmult = bli_cntx_get_bmult( bs_id, cntx );
|
||||
|
||||
return bli_blksz_get_def( dt, bmult );
|
||||
#if 0
|
||||
blksz_t* blkszs = bli_cntx_blkszs_buf( cntx );
|
||||
bszid_t* bmults = bli_cntx_bmults_buf( cntx );
|
||||
bszid_t bm_id = bmults[ bs_id ];
|
||||
|
||||
// A little hack to ensure we don't try to access a blocksize object
|
||||
// using an uninitialized/garbage value in the bmults array (which
|
||||
// may exist because that blocksize in the context was never set).
|
||||
if ( bm_id < BLIS_BSZID_LO && BLIS_BSZID_HI < bm_id ) return 0;
|
||||
|
||||
blksz_t* bmult = &blkszs[ bm_id ];
|
||||
|
||||
return bli_blksz_get_def( dt, bmult );
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
func_t* bli_cntx_get_l3_ukr( l3ukr_t ukr_id,
|
||||
cntx_t* cntx )
|
||||
@@ -240,6 +187,7 @@ func_t* bli_cntx_get_l3_ukr( l3ukr_t ukr_id,
|
||||
return l3_ukr;
|
||||
}
|
||||
|
||||
#if 0
|
||||
void* bli_cntx_get_l3_ukr_dt( num_t dt,
|
||||
l3ukr_t ukr_id,
|
||||
cntx_t* cntx )
|
||||
@@ -260,6 +208,7 @@ void* bli_cntx_get_l3_ukr_dt( num_t dt,
|
||||
|
||||
return bli_func_get_dt( dt, l3_ukr );
|
||||
}
|
||||
#endif
|
||||
|
||||
func_t* bli_cntx_get_l3_vir_ukr( l3ukr_t ukr_id,
|
||||
cntx_t* cntx )
|
||||
@@ -272,6 +221,7 @@ func_t* bli_cntx_get_l3_vir_ukr( l3ukr_t ukr_id,
|
||||
return l3_vir_ukr;
|
||||
}
|
||||
|
||||
#if 0
|
||||
void* bli_cntx_get_l3_vir_ukr_dt( num_t dt,
|
||||
l3ukr_t ukr_id,
|
||||
cntx_t* cntx )
|
||||
@@ -283,6 +233,7 @@ void* bli_cntx_get_l3_vir_ukr_dt( num_t dt,
|
||||
// identified by ukr_id.
|
||||
return bli_func_get_dt( dt, l3_vir_ukr );
|
||||
}
|
||||
#endif
|
||||
|
||||
func_t* bli_cntx_get_l3_nat_ukr( l3ukr_t ukr_id,
|
||||
cntx_t* cntx )
|
||||
@@ -295,6 +246,7 @@ func_t* bli_cntx_get_l3_nat_ukr( l3ukr_t ukr_id,
|
||||
return l3_nat_ukr;
|
||||
}
|
||||
|
||||
#if 0
|
||||
void* bli_cntx_get_l3_nat_ukr_dt( num_t dt,
|
||||
l3ukr_t ukr_id,
|
||||
cntx_t* cntx )
|
||||
@@ -306,6 +258,7 @@ void* bli_cntx_get_l3_nat_ukr_dt( num_t dt,
|
||||
// identified by ukr_id.
|
||||
return bli_func_get_dt( dt, l3_nat_ukr );
|
||||
}
|
||||
#endif
|
||||
|
||||
func_t* bli_cntx_get_l1f_ker( l1fkr_t ker_id,
|
||||
cntx_t* cntx )
|
||||
@@ -318,6 +271,7 @@ func_t* bli_cntx_get_l1f_ker( l1fkr_t ker_id,
|
||||
return l1f_ker;
|
||||
}
|
||||
|
||||
#if 0
|
||||
void* bli_cntx_get_l1f_ker_dt( num_t dt,
|
||||
l1fkr_t ker_id,
|
||||
cntx_t* cntx )
|
||||
@@ -327,6 +281,7 @@ void* bli_cntx_get_l1f_ker_dt( num_t dt,
|
||||
|
||||
return bli_func_get_dt( dt, l1f_ker );
|
||||
}
|
||||
#endif
|
||||
|
||||
func_t* bli_cntx_get_l1v_ker( l1vkr_t ker_id,
|
||||
cntx_t* cntx )
|
||||
@@ -339,6 +294,7 @@ func_t* bli_cntx_get_l1v_ker( l1vkr_t ker_id,
|
||||
return l1v_ker;
|
||||
}
|
||||
|
||||
#if 0
|
||||
void* bli_cntx_get_l1v_ker_dt( num_t dt,
|
||||
l1vkr_t ker_id,
|
||||
cntx_t* cntx )
|
||||
@@ -348,6 +304,7 @@ void* bli_cntx_get_l1v_ker_dt( num_t dt,
|
||||
|
||||
return bli_func_get_dt( dt, l1v_ker );
|
||||
}
|
||||
#endif
|
||||
|
||||
mbool_t* bli_cntx_get_l3_nat_ukr_prefs( l3ukr_t ukr_id,
|
||||
cntx_t* cntx )
|
||||
@@ -367,6 +324,7 @@ func_t* bli_cntx_get_packm_ukr( cntx_t* cntx )
|
||||
return packm_ukrs;
|
||||
}
|
||||
|
||||
#if 0
|
||||
ind_t bli_cntx_get_ind_method( cntx_t* cntx )
|
||||
{
|
||||
return bli_cntx_method( cntx );
|
||||
@@ -381,6 +339,7 @@ pack_t bli_cntx_get_pack_schema_b( cntx_t* cntx )
|
||||
{
|
||||
return bli_cntx_schema_b( cntx );
|
||||
}
|
||||
#endif
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@@ -66,55 +66,55 @@ typedef struct cntx_s
|
||||
|
||||
#define bli_cntx_blkszs_buf( cntx ) \
|
||||
\
|
||||
( cntx->blkszs )
|
||||
( (cntx)->blkszs )
|
||||
|
||||
#define bli_cntx_bmults_buf( cntx ) \
|
||||
\
|
||||
( cntx->bmults )
|
||||
( (cntx)->bmults )
|
||||
|
||||
#define bli_cntx_l3_vir_ukrs_buf( cntx ) \
|
||||
\
|
||||
( cntx->l3_vir_ukrs )
|
||||
( (cntx)->l3_vir_ukrs )
|
||||
|
||||
#define bli_cntx_l3_nat_ukrs_buf( cntx ) \
|
||||
\
|
||||
( cntx->l3_nat_ukrs )
|
||||
( (cntx)->l3_nat_ukrs )
|
||||
|
||||
#define bli_cntx_l3_nat_ukrs_prefs_buf( cntx ) \
|
||||
\
|
||||
( cntx->l3_nat_ukrs_prefs )
|
||||
( (cntx)->l3_nat_ukrs_prefs )
|
||||
|
||||
#define bli_cntx_l1f_kers_buf( cntx ) \
|
||||
\
|
||||
( cntx->l1f_kers )
|
||||
( (cntx)->l1f_kers )
|
||||
|
||||
#define bli_cntx_l1v_kers_buf( cntx ) \
|
||||
\
|
||||
( cntx->l1v_kers )
|
||||
( (cntx)->l1v_kers )
|
||||
|
||||
#define bli_cntx_packm_ukrs_buf( cntx ) \
|
||||
\
|
||||
(&(cntx->packm_ukrs) )
|
||||
(&((cntx)->packm_ukrs) )
|
||||
|
||||
#define bli_cntx_packm_ukrs( cntx ) \
|
||||
\
|
||||
(&(cntx->packm_ukrs) )
|
||||
(&((cntx)->packm_ukrs) )
|
||||
|
||||
#define bli_cntx_method( cntx ) \
|
||||
\
|
||||
( cntx->method )
|
||||
( (cntx)->method )
|
||||
|
||||
#define bli_cntx_schema_a( cntx ) \
|
||||
\
|
||||
( cntx->schema_a )
|
||||
( (cntx)->schema_a )
|
||||
|
||||
#define bli_cntx_schema_b( cntx ) \
|
||||
\
|
||||
( cntx->schema_b )
|
||||
( (cntx)->schema_b )
|
||||
|
||||
#define bli_cntx_schema_c( cntx ) \
|
||||
\
|
||||
( cntx->schema_c )
|
||||
( (cntx)->schema_c )
|
||||
|
||||
// cntx_t modification (fields only)
|
||||
|
||||
@@ -178,13 +178,99 @@ typedef struct cntx_s
|
||||
(cntx_p)->schema_c = _schema_c; \
|
||||
}
|
||||
|
||||
// cntx_t query (complex)
|
||||
|
||||
#define bli_cntx_get_blksz_def_dt( dt, bs_id, cntx ) \
|
||||
\
|
||||
bli_blksz_get_def \
|
||||
( \
|
||||
(dt), (&(bli_cntx_blkszs_buf( (cntx) ))[ bs_id ]) \
|
||||
)
|
||||
|
||||
#define bli_cntx_get_blksz_max_dt( dt, bs_id, cntx ) \
|
||||
\
|
||||
bli_blksz_get_max \
|
||||
( \
|
||||
(dt), (&(bli_cntx_blkszs_buf( (cntx) ))[ bs_id ]) \
|
||||
)
|
||||
|
||||
#define bli_cntx_get_bmult_dt( dt, bs_id, cntx ) \
|
||||
\
|
||||
bli_blksz_get_def \
|
||||
( \
|
||||
(dt), \
|
||||
(&(bli_cntx_blkszs_buf( (cntx) )) \
|
||||
[ \
|
||||
(bli_cntx_bmults_buf( (cntx) ))[ bs_id ] \
|
||||
]) \
|
||||
)
|
||||
|
||||
#define bli_cntx_get_l3_ukr_dt( dt, ukr_id, cntx ) \
|
||||
\
|
||||
bli_func_get_dt \
|
||||
( \
|
||||
(dt), \
|
||||
&(( \
|
||||
bli_cntx_method( (cntx) ) != BLIS_NAT \
|
||||
? bli_cntx_l3_vir_ukrs_buf( (cntx) ) \
|
||||
: bli_cntx_l3_nat_ukrs_buf( (cntx) ) \
|
||||
)[ ukr_id ]) \
|
||||
)
|
||||
|
||||
#define bli_cntx_get_l3_vir_ukr_dt( dt, ukr_id, cntx ) \
|
||||
\
|
||||
bli_func_get_dt \
|
||||
( \
|
||||
(dt), (&(bli_cntx_l3_vir_ukrs_buf( (cntx) ))[ ukr_id ]) \
|
||||
)
|
||||
|
||||
#define bli_cntx_get_l3_nat_ukr_dt( dt, ukr_id, cntx ) \
|
||||
\
|
||||
bli_func_get_dt \
|
||||
( \
|
||||
(dt), (&(bli_cntx_l3_nat_ukrs_buf( (cntx) ))[ ukr_id ]) \
|
||||
)
|
||||
|
||||
#define bli_cntx_get_l1f_ker_dt( dt, ker_id, cntx ) \
|
||||
\
|
||||
bli_func_get_dt \
|
||||
( \
|
||||
(dt), (&(bli_cntx_l1f_kers_buf( (cntx) ))[ ker_id ]) \
|
||||
)
|
||||
|
||||
#define bli_cntx_get_l1v_ker_dt( dt, ker_id, cntx ) \
|
||||
\
|
||||
bli_func_get_dt \
|
||||
( \
|
||||
(dt), (&(bli_cntx_l1v_kers_buf( (cntx) ))[ ker_id ]) \
|
||||
)
|
||||
|
||||
#define bli_cntx_get_l3_nat_ukr_prefs_dt( dt, ukr_id, cntx ) \
|
||||
\
|
||||
bli_mbool_get_dt \
|
||||
( \
|
||||
(dt), (&(bli_cntx_l3_nat_ukrs_prefs_buf( (cntx) ))[ ukr_id ]) \
|
||||
)
|
||||
|
||||
#define bli_cntx_get_ind_method( cntx ) \
|
||||
\
|
||||
bli_cntx_method( cntx )
|
||||
|
||||
#define bli_cntx_get_pack_schema_a( cntx ) \
|
||||
\
|
||||
bli_cntx_schema_a( cntx )
|
||||
|
||||
#define bli_cntx_get_pack_schema_b( cntx ) \
|
||||
\
|
||||
bli_cntx_schema_b( cntx )
|
||||
|
||||
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
// create/free
|
||||
|
||||
//void bli_cntx_obj_create( cntx_t* cntx );
|
||||
//void bli_cntx_obj_copy( cntx_t* src,
|
||||
// cntx_t* dst );
|
||||
//void bli_cntx_obj_free( cntx_t* cntx );
|
||||
void bli_cntx_obj_clear( cntx_t* cntx );
|
||||
void bli_cntx_init( cntx_t* cntx );
|
||||
@@ -193,49 +279,53 @@ void bli_cntx_init( cntx_t* cntx );
|
||||
|
||||
blksz_t* bli_cntx_get_blksz( bszid_t bs_id,
|
||||
cntx_t* cntx );
|
||||
dim_t bli_cntx_get_blksz_def_dt( num_t dt,
|
||||
bszid_t bs_id,
|
||||
cntx_t* cntx );
|
||||
dim_t bli_cntx_get_blksz_max_dt( num_t dt,
|
||||
bszid_t bs_id,
|
||||
cntx_t* cntx );
|
||||
blksz_t* bli_cntx_get_bmult( bszid_t bs_id,
|
||||
cntx_t* cntx );
|
||||
dim_t bli_cntx_get_bmult_dt( num_t dt,
|
||||
bszid_t bs_id,
|
||||
cntx_t* cntx );
|
||||
func_t* bli_cntx_get_l3_ukr( l3ukr_t ukr_id,
|
||||
cntx_t* cntx );
|
||||
void* bli_cntx_get_l3_ukr_dt( num_t dt,
|
||||
l3ukr_t ukr_id,
|
||||
cntx_t* cntx );
|
||||
func_t* bli_cntx_get_l3_vir_ukr( l3ukr_t ukr_id,
|
||||
cntx_t* cntx );
|
||||
void* bli_cntx_get_l3_vir_ukr_dt( num_t dt,
|
||||
l3ukr_t ukr_id,
|
||||
cntx_t* cntx );
|
||||
func_t* bli_cntx_get_l3_nat_ukr( l3ukr_t ukr_id,
|
||||
cntx_t* cntx );
|
||||
void* bli_cntx_get_l3_nat_ukr_dt( num_t dt,
|
||||
l3ukr_t ukr_id,
|
||||
cntx_t* cntx );
|
||||
mbool_t* bli_cntx_get_l3_nat_ukr_prefs( l3ukr_t ukr_id,
|
||||
cntx_t* cntx );
|
||||
func_t* bli_cntx_get_l1f_ker( l1fkr_t ker_id,
|
||||
cntx_t* cntx );
|
||||
void* bli_cntx_get_l1f_ker_dt( num_t dt,
|
||||
l1fkr_t ker_id,
|
||||
cntx_t* cntx );
|
||||
func_t* bli_cntx_get_l1v_ker( l1vkr_t ker_id,
|
||||
cntx_t* cntx );
|
||||
void* bli_cntx_get_l1v_ker_dt( num_t dt,
|
||||
l1vkr_t ker_id,
|
||||
cntx_t* cntx );
|
||||
func_t* bli_cntx_get_packm_ukr( cntx_t* cntx );
|
||||
ind_t bli_cntx_get_ind_method( cntx_t* cntx );
|
||||
pack_t bli_cntx_get_pack_schema_a( cntx_t* cntx );
|
||||
pack_t bli_cntx_get_pack_schema_b( cntx_t* cntx );
|
||||
pack_t bli_cntx_get_pack_schema_c( cntx_t* cntx );
|
||||
|
||||
//dim_t bli_cntx_get_blksz_def_dt( num_t dt,
|
||||
// bszid_t bs_id,
|
||||
// cntx_t* cntx );
|
||||
//dim_t bli_cntx_get_blksz_max_dt( num_t dt,
|
||||
// bszid_t bs_id,
|
||||
// cntx_t* cntx );
|
||||
//dim_t bli_cntx_get_bmult_dt( num_t dt,
|
||||
// bszid_t bs_id,
|
||||
// cntx_t* cntx );
|
||||
//void* bli_cntx_get_l3_ukr_dt( num_t dt,
|
||||
// l3ukr_t ukr_id,
|
||||
// cntx_t* cntx );
|
||||
//void* bli_cntx_get_l3_vir_ukr_dt( num_t dt,
|
||||
// l3ukr_t ukr_id,
|
||||
// cntx_t* cntx );
|
||||
//void* bli_cntx_get_l3_nat_ukr_dt( num_t dt,
|
||||
// l3ukr_t ukr_id,
|
||||
// cntx_t* cntx );
|
||||
//bool_t bli_cntx_get_l3_nat_ukr_prefs_dt( num_t dt,
|
||||
// l3ukr_t ukr_id,
|
||||
// cntx_t* cntx );
|
||||
//void* bli_cntx_get_l1f_ker_dt( num_t dt,
|
||||
// l1fkr_t ker_id,
|
||||
// cntx_t* cntx );
|
||||
//void* bli_cntx_get_l1v_ker_dt( num_t dt,
|
||||
// l1vkr_t ker_id,
|
||||
// cntx_t* cntx );
|
||||
//ind_t bli_cntx_get_ind_method( cntx_t* cntx );
|
||||
//pack_t bli_cntx_get_pack_schema_a( cntx_t* cntx );
|
||||
//pack_t bli_cntx_get_pack_schema_b( cntx_t* cntx );
|
||||
//pack_t bli_cntx_get_pack_schema_c( cntx_t* cntx );
|
||||
|
||||
// set functions
|
||||
|
||||
@@ -294,10 +384,17 @@ void bli_cntx_print( cntx_t* cntx );
|
||||
|
||||
// Preprocess out these calls entirely, since they are currently just empty
|
||||
// functions that do nothing.
|
||||
//#define bli_cntx_obj_create( cntx ) { bli_cntx_obj_clear( cntx ); }
|
||||
//#define bli_cntx_obj_free( cntx ) { bli_cntx_obj_clear( cntx ); }
|
||||
#define bli_cntx_obj_create( cntx ) { ; }
|
||||
#define bli_cntx_obj_free( cntx ) { ; }
|
||||
#if 0
|
||||
#define bli_cntx_obj_create( cntx ) { bli_cntx_obj_clear( cntx ); }
|
||||
#define bli_cntx_obj_free( cntx ) { bli_cntx_obj_clear( cntx ); }
|
||||
#else
|
||||
#define bli_cntx_obj_create( cntx ) { ; }
|
||||
#define bli_cntx_obj_free( cntx ) { ; }
|
||||
#endif
|
||||
|
||||
// These macros initialize/finalize a local context if the given context
|
||||
// pointer is NULL. When initializing, the context address that should
|
||||
// be used (local or external) is assigned to cntx_p.
|
||||
|
||||
#define bli_cntx_init_local_if( opname, cntx, cntx_p ) \
|
||||
\
|
||||
|
||||
@@ -34,13 +34,13 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
obj_t BLIS_TWO;
|
||||
obj_t BLIS_ONE;
|
||||
obj_t BLIS_ONE_HALF;
|
||||
obj_t BLIS_ZERO;
|
||||
obj_t BLIS_MINUS_ONE_HALF;
|
||||
obj_t BLIS_MINUS_ONE;
|
||||
obj_t BLIS_MINUS_TWO;
|
||||
obj_t BLIS_TWO = {};
|
||||
obj_t BLIS_ONE = {};
|
||||
obj_t BLIS_ONE_HALF = {};
|
||||
obj_t BLIS_ZERO = {};
|
||||
obj_t BLIS_MINUS_ONE_HALF = {};
|
||||
obj_t BLIS_MINUS_ONE = {};
|
||||
obj_t BLIS_MINUS_TWO = {};
|
||||
|
||||
static bool_t bli_const_is_init = FALSE;
|
||||
|
||||
|
||||
@@ -35,11 +35,11 @@
|
||||
#include "blis.h"
|
||||
|
||||
|
||||
char *bli_optarg;
|
||||
char *bli_optarg = NULL;
|
||||
int bli_optind = 1;
|
||||
|
||||
int bli_opterr = 0;
|
||||
int bli_optopt;
|
||||
int bli_optopt = 0;
|
||||
|
||||
static char OPT_MARKER = '-';
|
||||
|
||||
|
||||
@@ -754,6 +754,9 @@ static func_t bli_gks_l1v_kers[BLIS_NUM_LEVEL1V_KERS] =
|
||||
/* addv */ { { BLIS_SADDV_KERNEL, BLIS_CADDV_KERNEL,
|
||||
BLIS_DADDV_KERNEL, BLIS_ZADDV_KERNEL, }
|
||||
},
|
||||
/* axpbyv */ { { BLIS_SAXPBYV_KERNEL, BLIS_CAXPBYV_KERNEL,
|
||||
BLIS_DAXPBYV_KERNEL, BLIS_ZAXPBYV_KERNEL, }
|
||||
},
|
||||
/* axpyv */ { { BLIS_SAXPYV_KERNEL, BLIS_CAXPYV_KERNEL,
|
||||
BLIS_DAXPYV_KERNEL, BLIS_ZAXPYV_KERNEL, }
|
||||
},
|
||||
@@ -784,6 +787,9 @@ static func_t bli_gks_l1v_kers[BLIS_NUM_LEVEL1V_KERS] =
|
||||
/* swapv */ { { BLIS_SSWAPV_KERNEL, BLIS_CSWAPV_KERNEL,
|
||||
BLIS_DSWAPV_KERNEL, BLIS_ZSWAPV_KERNEL, }
|
||||
},
|
||||
/* xpbyv */ { { BLIS_SXPBYV_KERNEL, BLIS_CXPBYV_KERNEL,
|
||||
BLIS_DXPBYV_KERNEL, BLIS_ZXPBYV_KERNEL, }
|
||||
},
|
||||
};
|
||||
|
||||
static func_t bli_gks_l1v_ref_kers[BLIS_NUM_LEVEL1V_KERS] =
|
||||
@@ -792,6 +798,9 @@ static func_t bli_gks_l1v_ref_kers[BLIS_NUM_LEVEL1V_KERS] =
|
||||
/* addv */ { { BLIS_SADDV_KERNEL_REF, BLIS_CADDV_KERNEL_REF,
|
||||
BLIS_DADDV_KERNEL_REF, BLIS_ZADDV_KERNEL_REF, }
|
||||
},
|
||||
/* axpbyv */ { { BLIS_SAXPBYV_KERNEL_REF, BLIS_CAXPBYV_KERNEL_REF,
|
||||
BLIS_DAXPBYV_KERNEL_REF, BLIS_ZAXPBYV_KERNEL_REF, }
|
||||
},
|
||||
/* axpyv */ { { BLIS_SAXPYV_KERNEL_REF, BLIS_CAXPYV_KERNEL_REF,
|
||||
BLIS_DAXPYV_KERNEL_REF, BLIS_ZAXPYV_KERNEL_REF, }
|
||||
},
|
||||
@@ -822,6 +831,9 @@ static func_t bli_gks_l1v_ref_kers[BLIS_NUM_LEVEL1V_KERS] =
|
||||
/* swapv */ { { BLIS_SSWAPV_KERNEL_REF, BLIS_CSWAPV_KERNEL_REF,
|
||||
BLIS_DSWAPV_KERNEL_REF, BLIS_ZSWAPV_KERNEL_REF, }
|
||||
},
|
||||
/* xpbyv */ { { BLIS_SXPBYV_KERNEL_REF, BLIS_CXPBYV_KERNEL_REF,
|
||||
BLIS_DXPBYV_KERNEL_REF, BLIS_ZXPBYV_KERNEL_REF, }
|
||||
},
|
||||
};
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
@@ -36,10 +36,10 @@
|
||||
|
||||
static bool_t bli_thread_is_init = FALSE;
|
||||
|
||||
packm_thrinfo_t BLIS_PACKM_SINGLE_THREADED;
|
||||
gemm_thrinfo_t BLIS_GEMM_SINGLE_THREADED;
|
||||
herk_thrinfo_t BLIS_HERK_SINGLE_THREADED;
|
||||
thread_comm_t BLIS_SINGLE_COMM;
|
||||
packm_thrinfo_t BLIS_PACKM_SINGLE_THREADED = {};
|
||||
gemm_thrinfo_t BLIS_GEMM_SINGLE_THREADED = {};
|
||||
herk_thrinfo_t BLIS_HERK_SINGLE_THREADED = {};
|
||||
thread_comm_t BLIS_SINGLE_COMM = {};
|
||||
|
||||
void bli_thread_init( void )
|
||||
{
|
||||
|
||||
@@ -43,8 +43,8 @@
|
||||
\
|
||||
f77_int PASTEF772(i,chx,blasname) \
|
||||
( \
|
||||
f77_int* n, \
|
||||
ftype_x* x, f77_int* incx \
|
||||
const f77_int* n, \
|
||||
const ftype_x* x, const f77_int* incx \
|
||||
) \
|
||||
{ \
|
||||
dim_t n0; \
|
||||
@@ -68,7 +68,7 @@ f77_int PASTEF772(i,chx,blasname) \
|
||||
\
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */ \
|
||||
bli_convert_blas_incv( n0, x, *incx, x0, incx0 ); \
|
||||
bli_convert_blas_incv( n0, (ftype_x*)x, *incx, x0, incx0 ); \
|
||||
\
|
||||
/* Call BLIS interface. */ \
|
||||
PASTEMAC(chx,blisname) \
|
||||
|
||||
@@ -41,8 +41,8 @@
|
||||
\
|
||||
f77_int PASTEF772(i,chx,blasname) \
|
||||
( \
|
||||
f77_int* n, \
|
||||
ftype_x* x, f77_int* incx \
|
||||
const f77_int* n, \
|
||||
const ftype_x* x, const f77_int* incx \
|
||||
);
|
||||
|
||||
#ifdef BLIS_ENABLE_BLAS2BLIS
|
||||
|
||||
@@ -43,8 +43,8 @@
|
||||
\
|
||||
ftype_r PASTEF772(chr,chx,blasname) \
|
||||
( \
|
||||
f77_int* n, \
|
||||
ftype_x* x, f77_int* incx \
|
||||
const f77_int* n, \
|
||||
const ftype_x* x, const f77_int* incx \
|
||||
) \
|
||||
{ \
|
||||
dim_t n0; \
|
||||
@@ -61,7 +61,7 @@ ftype_r PASTEF772(chr,chx,blasname) \
|
||||
\
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */ \
|
||||
bli_convert_blas_incv( n0, x, *incx, x0, incx0 ); \
|
||||
bli_convert_blas_incv( n0, (ftype_x*)x, *incx, x0, incx0 ); \
|
||||
\
|
||||
/* Call BLIS interface. */ \
|
||||
PASTEMAC(chx,blisname) \
|
||||
|
||||
@@ -41,8 +41,8 @@
|
||||
\
|
||||
ftype_r PASTEF772(chr,chx,blasname) \
|
||||
( \
|
||||
f77_int* n, \
|
||||
ftype_x* x, f77_int* incx \
|
||||
const f77_int* n, \
|
||||
const ftype_x* x, const f77_int* incx \
|
||||
);
|
||||
|
||||
#ifdef BLIS_ENABLE_BLAS2BLIS
|
||||
|
||||
@@ -43,10 +43,10 @@
|
||||
\
|
||||
void PASTEF77(ch,blasname) \
|
||||
( \
|
||||
f77_int* n, \
|
||||
ftype* alpha, \
|
||||
ftype* x, f77_int* incx, \
|
||||
ftype* y, f77_int* incy \
|
||||
const f77_int* n, \
|
||||
const ftype* alpha, \
|
||||
const ftype* x, const f77_int* incx, \
|
||||
ftype* y, const f77_int* incy \
|
||||
) \
|
||||
{ \
|
||||
dim_t n0; \
|
||||
@@ -64,15 +64,15 @@ void PASTEF77(ch,blasname) \
|
||||
\
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */ \
|
||||
bli_convert_blas_incv( n0, x, *incx, x0, incx0 ); \
|
||||
bli_convert_blas_incv( n0, y, *incy, y0, incy0 ); \
|
||||
bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \
|
||||
bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \
|
||||
\
|
||||
/* Call BLIS interface. */ \
|
||||
PASTEMAC(ch,blisname) \
|
||||
( \
|
||||
BLIS_NO_CONJUGATE, \
|
||||
n0, \
|
||||
alpha, \
|
||||
(ftype*)alpha, \
|
||||
x0, incx0, \
|
||||
y0, incy0, \
|
||||
NULL \
|
||||
|
||||
@@ -41,10 +41,10 @@
|
||||
\
|
||||
void PASTEF77(ch,blasname) \
|
||||
( \
|
||||
f77_int* n, \
|
||||
ftype* alpha, \
|
||||
ftype* x, f77_int* incx, \
|
||||
ftype* y, f77_int* incy \
|
||||
const f77_int* n, \
|
||||
const ftype* alpha, \
|
||||
const ftype* x, const f77_int* incx, \
|
||||
ftype* y, const f77_int* incy \
|
||||
);
|
||||
|
||||
#ifdef BLIS_ENABLE_BLAS2BLIS
|
||||
|
||||
@@ -43,9 +43,9 @@
|
||||
\
|
||||
void PASTEF77(ch,blasname) \
|
||||
( \
|
||||
f77_int* n, \
|
||||
ftype* x, f77_int* incx, \
|
||||
ftype* y, f77_int* incy \
|
||||
const f77_int* n, \
|
||||
const ftype* x, const f77_int* incx, \
|
||||
ftype* y, const f77_int* incy \
|
||||
) \
|
||||
{ \
|
||||
dim_t n0; \
|
||||
@@ -63,8 +63,8 @@ void PASTEF77(ch,blasname) \
|
||||
\
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */ \
|
||||
bli_convert_blas_incv( n0, x, *incx, x0, incx0 ); \
|
||||
bli_convert_blas_incv( n0, y, *incy, y0, incy0 ); \
|
||||
bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \
|
||||
bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \
|
||||
\
|
||||
/* Call BLIS interface. */ \
|
||||
PASTEMAC(ch,blisname) \
|
||||
|
||||
@@ -41,9 +41,9 @@
|
||||
\
|
||||
void PASTEF77(ch,blasname) \
|
||||
( \
|
||||
f77_int* n, \
|
||||
ftype* x, f77_int* incx, \
|
||||
ftype* y, f77_int* incy \
|
||||
const f77_int* n, \
|
||||
const ftype* x, const f77_int* incx, \
|
||||
ftype* y, const f77_int* incy \
|
||||
);
|
||||
|
||||
#ifdef BLIS_ENABLE_BLAS2BLIS
|
||||
|
||||
@@ -43,9 +43,9 @@
|
||||
\
|
||||
ftype PASTEF772(chxy,blasname,chc) \
|
||||
( \
|
||||
f77_int* n, \
|
||||
ftype* x, f77_int* incx, \
|
||||
ftype* y, f77_int* incy \
|
||||
const f77_int* n, \
|
||||
const ftype* x, const f77_int* incx, \
|
||||
const ftype* y, const f77_int* incy \
|
||||
) \
|
||||
{ \
|
||||
dim_t n0; \
|
||||
@@ -64,8 +64,8 @@ ftype PASTEF772(chxy,blasname,chc) \
|
||||
\
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */ \
|
||||
bli_convert_blas_incv( n0, x, *incx, x0, incx0 ); \
|
||||
bli_convert_blas_incv( n0, y, *incy, y0, incy0 ); \
|
||||
bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \
|
||||
bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \
|
||||
\
|
||||
/* Call BLIS interface. */ \
|
||||
PASTEMAC(chxy,blisname) \
|
||||
@@ -95,9 +95,9 @@ INSERT_GENTFUNCDOT_BLAS( dot, dotv )
|
||||
// with result returned in single precision.
|
||||
float PASTEF77(sd,sdot)
|
||||
(
|
||||
f77_int* n,
|
||||
float* x, f77_int* incx,
|
||||
float* y, f77_int* incy
|
||||
const f77_int* n,
|
||||
const float* x, const f77_int* incx,
|
||||
const float* y, const f77_int* incy
|
||||
)
|
||||
{
|
||||
return ( float )PASTEF77(d,sdot)( n,
|
||||
@@ -109,9 +109,9 @@ float PASTEF77(sd,sdot)
|
||||
// with result returned in double precision.
|
||||
double PASTEF77(d,sdot)
|
||||
(
|
||||
f77_int* n,
|
||||
float* x, f77_int* incx,
|
||||
float* y, f77_int* incy
|
||||
const f77_int* n,
|
||||
const float* x, const f77_int* incx,
|
||||
const float* y, const f77_int* incy
|
||||
)
|
||||
{
|
||||
dim_t n0;
|
||||
@@ -129,8 +129,8 @@ double PASTEF77(d,sdot)
|
||||
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */
|
||||
bli_convert_blas_incv( n0, x, *incx, x0, incx0 );
|
||||
bli_convert_blas_incv( n0, y, *incy, y0, incy0 );
|
||||
bli_convert_blas_incv( n0, (float*)x, *incx, x0, incx0 );
|
||||
bli_convert_blas_incv( n0, (float*)y, *incy, y0, incy0 );
|
||||
|
||||
rho = 0.0;
|
||||
|
||||
|
||||
@@ -41,9 +41,9 @@
|
||||
\
|
||||
ftype PASTEF772(chxy,blasname,chc) \
|
||||
( \
|
||||
f77_int* n, \
|
||||
ftype* x, f77_int* incx, \
|
||||
ftype* y, f77_int* incy \
|
||||
const f77_int* n, \
|
||||
const ftype* x, const f77_int* incx, \
|
||||
const ftype* y, const f77_int* incy \
|
||||
);
|
||||
|
||||
#ifdef BLIS_ENABLE_BLAS2BLIS
|
||||
@@ -54,15 +54,15 @@ INSERT_GENTPROTDOT_BLAS( dot )
|
||||
|
||||
float PASTEF77(sd,sdot)
|
||||
(
|
||||
f77_int* n,
|
||||
float* x, f77_int* incx,
|
||||
float* y, f77_int* incy
|
||||
const f77_int* n,
|
||||
const float* x, const f77_int* incx,
|
||||
const float* y, const f77_int* incy
|
||||
);
|
||||
|
||||
double PASTEF77(d,sdot)
|
||||
(
|
||||
f77_int* n,
|
||||
float* x, f77_int* incx,
|
||||
float* y, f77_int* incy
|
||||
const f77_int* n,
|
||||
const float* x, const f77_int* incx,
|
||||
const float* y, const f77_int* incy
|
||||
);
|
||||
#endif
|
||||
|
||||
@@ -43,16 +43,16 @@
|
||||
\
|
||||
void PASTEF77(ch,blasname) \
|
||||
( \
|
||||
f77_char* transa, \
|
||||
f77_char* transb, \
|
||||
f77_int* m, \
|
||||
f77_int* n, \
|
||||
f77_int* k, \
|
||||
ftype* alpha, \
|
||||
ftype* a, f77_int* lda, \
|
||||
ftype* b, f77_int* ldb, \
|
||||
ftype* beta, \
|
||||
ftype* c, f77_int* ldc \
|
||||
const f77_char* transa, \
|
||||
const f77_char* transb, \
|
||||
const f77_int* m, \
|
||||
const f77_int* n, \
|
||||
const f77_int* k, \
|
||||
const ftype* alpha, \
|
||||
const ftype* a, const f77_int* lda, \
|
||||
const ftype* b, const f77_int* ldb, \
|
||||
const ftype* beta, \
|
||||
ftype* c, const f77_int* ldc \
|
||||
) \
|
||||
{ \
|
||||
trans_t blis_transa; \
|
||||
@@ -106,11 +106,11 @@ void PASTEF77(ch,blasname) \
|
||||
m0, \
|
||||
n0, \
|
||||
k0, \
|
||||
alpha, \
|
||||
a, rs_a, cs_a, \
|
||||
b, rs_b, cs_b, \
|
||||
beta, \
|
||||
c, rs_c, cs_c, \
|
||||
(ftype*)alpha, \
|
||||
(ftype*)a, rs_a, cs_a, \
|
||||
(ftype*)b, rs_b, cs_b, \
|
||||
(ftype*)beta, \
|
||||
(ftype*)c, rs_c, cs_c, \
|
||||
NULL \
|
||||
); \
|
||||
\
|
||||
|
||||
@@ -41,16 +41,16 @@
|
||||
\
|
||||
void PASTEF77(ch,blasname) \
|
||||
( \
|
||||
f77_char* transa, \
|
||||
f77_char* transb, \
|
||||
f77_int* m, \
|
||||
f77_int* n, \
|
||||
f77_int* k, \
|
||||
ftype* alpha, \
|
||||
ftype* a, f77_int* lda, \
|
||||
ftype* b, f77_int* ldb, \
|
||||
ftype* beta, \
|
||||
ftype* c, f77_int* ldc \
|
||||
const f77_char* transa, \
|
||||
const f77_char* transb, \
|
||||
const f77_int* m, \
|
||||
const f77_int* n, \
|
||||
const f77_int* k, \
|
||||
const ftype* alpha, \
|
||||
const ftype* a, const f77_int* lda, \
|
||||
const ftype* b, const f77_int* ldb, \
|
||||
const ftype* beta, \
|
||||
ftype* c, const f77_int* ldc \
|
||||
);
|
||||
|
||||
#ifdef BLIS_ENABLE_BLAS2BLIS
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user