Merge branch 'master' into knl

This commit is contained in:
Devin Matthews
2016-05-10 11:49:47 -05:00
438 changed files with 9508 additions and 5287 deletions

1
.gitignore vendored
View File

@@ -29,6 +29,7 @@
# -- build system files --
config.mk
bli_config.h
# -- makefile fragments --

View File

@@ -257,7 +257,7 @@ endif
# Expand the fragment paths that contain .h files to attain the set of header
# files present in all fragment paths.
MK_HEADER_FILES := $(foreach frag_path, $(FRAGMENT_DIR_PATHS), \
MK_HEADER_FILES := $(foreach frag_path, . $(FRAGMENT_DIR_PATHS), \
$(wildcard $(frag_path)/*.h))
# Strip the leading, internal, and trailing whitespace from our list of header
@@ -268,7 +268,7 @@ MK_HEADER_FILES := $(strip $(MK_HEADER_FILES))
# expansion. Then, strip the header filename to leave the path to each header
# location. Notice this process even weeds out duplicates! Add the config
# directory manually since it contains FLA_config.h.
MK_HEADER_DIR_PATHS := $(dir $(foreach frag_path, $(FRAGMENT_DIR_PATHS), \
MK_HEADER_DIR_PATHS := $(dir $(foreach frag_path, . $(FRAGMENT_DIR_PATHS), \
$(firstword $(wildcard $(frag_path)/*.h))))
# Add -I to each header path so we can specify our include search paths to the
@@ -678,11 +678,11 @@ endif
cleantest: check-env
ifeq ($(BLIS_ENABLE_VERBOSE_MAKE_OUTPUT),yes)
- $(FIND) $(BASE_OBJ_TESTSUITE_PATH) -name "*.o" -name "*.pexe" | $(XARGS) $(RM_F)
- $(FIND) $(BASE_OBJ_TESTSUITE_PATH) \( -name "*.o" -o -name "*.pexe" \) | $(XARGS) $(RM_F)
- $(RM_RF) $(TESTSUITE_BIN)
else
@echo "Removing object files from $(BASE_OBJ_TESTSUITE_PATH)."
@- $(FIND) $(BASE_OBJ_TESTSUITE_PATH) -name "*.o" -name "*.pexe" | $(XARGS) $(RM_F)
@- $(FIND) $(BASE_OBJ_TESTSUITE_PATH) \( -name "*.o" -o -name "*.pexe" \) | $(XARGS) $(RM_F)
@echo "Removing $(TESTSUITE_BIN) binary."
@- $(RM_RF) $(TESTSUITE_BIN)
endif

View File

@@ -1,6 +1,6 @@
/*
BLIS
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
@@ -35,11 +35,48 @@
#ifndef BLIS_CONFIG_H
#define BLIS_CONFIG_H
#if @enable_pthreads@
#define BLIS_ENABLE_PTHREADS
#endif
#define BLIS_SIMD_ALIGN_SIZE 16
#if @enable_openmp@
#define BLIS_ENABLE_OPENMP
#endif
#if @int_type_size@ == 64
#define BLIS_INT_TYPE_SIZE 64
#elif @int_type_size@ == 32
#define BLIS_INT_TYPE_SIZE 32
#else
// determine automatically
#endif
#if @blas2blis_int_type_size@ == 64
#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 64
#elif @blas2blis_int_type_size@ == 32
#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32
#else
// determine automatically
#endif
#ifndef BLIS_ENABLE_BLAS2BLIS
#ifndef BLIS_DISABLE_BLAS2BLIS
#if @enable_blas2blis@
#define BLIS_ENABLE_BLAS2BLIS
#else
#define BLIS_DISABLE_BLAS2BLIS
#endif
#endif
#endif
#ifndef BLIS_ENABLE_CBLAS
#ifndef BLIS_DISABLE_CBLAS
#if @enable_cblas@
#define BLIS_ENABLE_CBLAS
#else
#define BLIS_DISABLE_CBLAS
#endif
#endif
#endif
#endif

View File

@@ -156,11 +156,11 @@ ifeq ($(THREADING_MODEL),auto)
THREADING_MODEL := omp
endif
ifeq ($(THREADING_MODEL),omp)
CTHREADFLAGS := -fopenmp -DBLIS_ENABLE_OPENMP
CTHREADFLAGS := -fopenmp
LDFLAGS += -fopenmp
endif
ifeq ($(THREADING_MODEL),pthreads)
CTHREADFLAGS := -pthread -DBLIS_ENABLE_PTHREADS
CTHREADFLAGS := -pthread
LDFLAGS += -lpthread
endif
endif
@@ -170,11 +170,11 @@ ifeq ($(THREADING_MODEL),auto)
THREADING_MODEL := omp
endif
ifeq ($(THREADING_MODEL),omp)
CTHREADFLAGS := -openmp -DBLIS_ENABLE_OPENMP
CTHREADFLAGS := -openmp
LDFLAGS += -openmp
endif
ifeq ($(THREADING_MODEL),pthreads)
CTHREADFLAGS := -pthread -DBLIS_ENABLE_PTHREADS
CTHREADFLAGS := -pthread
LDFLAGS += -lpthread
endif
endif
@@ -184,10 +184,11 @@ ifeq ($(THREADING_MODEL),auto)
THREADING_MODEL := pthreads
endif
ifeq ($(THREADING_MODEL),omp)
$(error OpenMP is not supported with Clang.)
CTHREADFLAGS := -fopenmp
LDFLAGS += -fopenmp
endif
ifeq ($(THREADING_MODEL),pthreads)
CTHREADFLAGS := -pthread -DBLIS_ENABLE_PTHREADS
CTHREADFLAGS := -pthread
LDFLAGS += -lpthread
endif
endif
@@ -207,10 +208,12 @@ CFLAGS_KERNELS := $(CKOPTFLAGS) $(CVECFLAGS) $(CFLAGS_NOOPT)
ifeq ($(V),1)
BLIS_ENABLE_VERBOSE_MAKE_OUTPUT := yes
BLIS_ENABLE_TEST_OUTPUT := yes
endif
ifeq ($(V),0)
BLIS_ENABLE_VERBOSE_MAKE_OUTPUT := no
BLIS_ENABLE_TEST_OUTPUT := no
endif

View File

@@ -1,43 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_CONFIG_H
#define BLIS_CONFIG_H
#define BLIS_INT_TYPE_SIZE 32
#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32
#endif

View File

@@ -1,44 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_CONFIG_H
#define BLIS_CONFIG_H
#define BLIS_INT_TYPE_SIZE 64
#define BLIS_SIMD_ALIGN_SIZE 16
#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32
#endif

View File

@@ -38,6 +38,8 @@
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
#define BLIS_SIMD_ALIGN_SIZE 16
// -- Cache blocksizes --
//

View File

@@ -1,45 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_CONFIG_H
#define BLIS_CONFIG_H
#undef restrict
#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32
#endif

View File

@@ -36,6 +36,9 @@
#define BLIS_KERNEL_H
#undef restrict
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
// -- Cache blocksizes --

View File

@@ -1,43 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_CONFIG_H
#define BLIS_CONFIG_H
#define BLIS_SIMD_ALIGN_SIZE 16
#endif

View File

@@ -38,6 +38,8 @@
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
#define BLIS_SIMD_ALIGN_SIZE 16
// -- Cache blocksizes --
//

View File

@@ -38,6 +38,8 @@
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
#define BLIS_SIMD_ALIGN_SIZE 16
// -- Cache blocksizes --
//

View File

@@ -1,44 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_CONFIG_H
#define BLIS_CONFIG_H
#define BLIS_INT_TYPE_SIZE 32
#define BLIS_SIMD_ALIGN_SIZE 16
#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32
#endif

View File

@@ -38,6 +38,8 @@
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
#define BLIS_SIMD_ALIGN_SIZE 16
// -- Cache blocksizes --
//

View File

@@ -1,44 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_CONFIG_H
#define BLIS_CONFIG_H
#define BLIS_INT_TYPE_SIZE 32
#define BLIS_SIMD_ALIGN_SIZE 16
#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32
#endif

View File

@@ -38,6 +38,8 @@
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
#define BLIS_SIMD_ALIGN_SIZE 16
// -- Cache blocksizes --
//

View File

@@ -1,42 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_CONFIG_H
#define BLIS_CONFIG_H
#define BLIS_SIMD_ALIGN_SIZE 16
#endif

View File

@@ -38,6 +38,9 @@
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
#undef BLIS_SIMD_ALIGN_SIZE
#define BLIS_SIMD_ALIGN_SIZE 16
// -- Cache blocksizes --
//

View File

@@ -1,44 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_CONFIG_H
#define BLIS_CONFIG_H
#define BLIS_INT_TYPE_SIZE 32
#define BLIS_SIMD_ALIGN_SIZE 16
#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32
#endif

View File

@@ -39,6 +39,8 @@
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
#define BLIS_SIMD_ALIGN_SIZE 16
// -- Cache blocksizes --
//

View File

@@ -1,41 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_CONFIG_H
#define BLIS_CONFIG_H
#endif

View File

@@ -60,12 +60,23 @@
#else
/*
#define BLIS_SGEMM_UKERNEL bli_sgemm_asm_16x6
#define BLIS_DEFAULT_MC_S 144
#define BLIS_DEFAULT_KC_S 256
#define BLIS_DEFAULT_NC_S 4080
#define BLIS_DEFAULT_MR_S 16
#define BLIS_DEFAULT_NR_S 6
*/
#define BLIS_SGEMM_UKERNEL bli_sgemm_asm_6x16
#define BLIS_DEFAULT_MC_S 144
#define BLIS_DEFAULT_KC_S 256
#define BLIS_DEFAULT_NC_S 4080
#define BLIS_DEFAULT_MR_S 6
#define BLIS_DEFAULT_NR_S 16
#define BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS
#endif
@@ -80,12 +91,24 @@
#else
/*
#define BLIS_DGEMM_UKERNEL bli_dgemm_asm_8x6
#define BLIS_DEFAULT_MC_D 72
#define BLIS_DEFAULT_KC_D 256
#define BLIS_DEFAULT_NC_D 4080
#define BLIS_DEFAULT_MR_D 8
#define BLIS_DEFAULT_NR_D 6
*/
#define BLIS_DGEMM_UKERNEL bli_dgemm_asm_6x8
#define BLIS_DEFAULT_MC_D 72
#define BLIS_DEFAULT_KC_D 256
#define BLIS_DEFAULT_NC_D 4080
#define BLIS_DEFAULT_MR_D 6
#define BLIS_DEFAULT_NR_D 8
#define BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS
#endif

View File

@@ -1,44 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_CONFIG_H
#define BLIS_CONFIG_H
#define BLIS_INT_TYPE_SIZE 32
#define BLIS_SIMD_ALIGN_SIZE 16
#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32
#endif

View File

@@ -38,6 +38,8 @@
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
#define BLIS_SIMD_ALIGN_SIZE 16
// -- Cache blocksizes --
//

View File

@@ -36,6 +36,16 @@
#define BLIS_KERNEL_H
#define BLIS_TREE_BARRIER
#define BLIS_TREE_BARRIER_ARITY 4
#define BLIS_SIMD_ALIGN_SIZE 32
#define BLIS_SIMD_SIZE 64
#define BLIS_SIMD_NUM_REGISTERS 32
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
// -- Cache blocksizes --

View File

@@ -1,43 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_CONFIG_H
#define BLIS_CONFIG_H
#define BLIS_SIMD_ALIGN_SIZE 16
#endif

View File

@@ -38,6 +38,8 @@
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
#define BLIS_SIMD_ALIGN_SIZE 16
// -- Cache blocksizes --
//

View File

@@ -1,44 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_CONFIG_H
#define BLIS_CONFIG_H
#define BLIS_INT_TYPE_SIZE 32
#define BLIS_SIMD_ALIGN_SIZE 16
#define BLIS_BLAS2BLIS_INT_TYPE_SIZE 32
#endif

View File

@@ -44,6 +44,8 @@
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
#define BLIS_SIMD_ALIGN_SIZE 16
// -- Cache blocksizes --
//

View File

@@ -1,42 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_CONFIG_H
#define BLIS_CONFIG_H
#endif

View File

@@ -1,41 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_CONFIG_H
#define BLIS_CONFIG_H
#endif

121
configure vendored
View File

@@ -91,14 +91,34 @@ print_usage()
echo " -t MODEL, --enable-threading[=MODEL], --disable-threading"
echo " "
echo " Enable threading in the library, using threading model"
echo " MODEL={auto,omp,pthreads,no}. If MODEL=no or "
echo " MODEL={omp,pthreads,no}. If MODEL=no or "
echo " --disable-threading is specified, threading will be"
echo " disabled. If MODEL=auto or is unspecified, a model"
echo " will be chosen automatically. The default is 'auto'."
echo " disabled. The default is 'no'."
echo " "
echo " -q, --quiet Suppress informational output. By default, configure"
echo " is verbose. (NOTE: -q is not yet implemented)"
echo " "
echo " -i SIZE, --int-size=SIZE"
echo " "
echo " Set the size (in bits) of internal BLIS integers and"
echo " integer types used in native BLIS interfaces."
echo " "
echo " -b SIZE, --blas-int-size=SIZE"
echo " "
echo " Set the size (in bits) of integer types in external"
echo " BLAS and CBLAS interfaces, if enabled."
echo " "
echo " --disable-blas, --enable-blas"
echo " "
echo " Disable (enabled by default) building the BLAS"
echo " compatibility layer."
echo " "
echo " --enable-cblas, --disable-cblas"
echo " "
echo " Enable (disabled by default) building the CBLAS"
echo " compatibility layer. This automatically enables the"
echo " BLAS compatibility layer as well."
echo " "
echo " -h, --help Output this information and quit."
echo " "
echo " Environment Variables:"
@@ -149,6 +169,13 @@ main()
config_mk_in_path="${build_dirpath}/${config_mk_in}"
config_mk_out_path="${cur_dirpath}/${config_mk_out}"
# The names/paths for the template bli_config.h.in and its instantiated
# counterpart.
bli_config_h_in='bli_config.h.in'
bli_config_h_out='bli_config.h'
bli_config_h_in_path="${build_dirpath}/${bli_config_h_in}"
bli_config_h_out_path="${cur_dirpath}/${bli_config_h_out}"
# Path to 'update-version-file.sh' script.
update_version_file_sh="${build_dirpath}/update-version-file.sh"
@@ -188,7 +215,7 @@ main()
debug_flag=''
# The threading flag.
threading_model='auto'
threading_model='no'
# Option variables.
quiet_flag=''
@@ -197,6 +224,10 @@ main()
enable_verbose='no'
enable_static='yes'
enable_shared='no'
int_type_size=0
blas2blis_int_type_size=32
enable_blas2blis='yes'
enable_cblas='no'
# The path to the auto-detection script.
auto_detect_sh="${build_dirpath}/auto-detect/auto-detect.sh"
@@ -221,7 +252,7 @@ main()
# Process our command line options.
while getopts ":hp:d:t:q-:" opt; do
while getopts ":hp:d:t:qi:b:-:" opt; do
case $opt in
-)
case "$OPTARG" in
@@ -264,15 +295,30 @@ main()
disable-shared)
enable_shared='no'
;;
enable-threading)
threading_model='auto'
;;
enable-threading=*)
threading_model=${OPTARG#*=}
;;
disable-threading)
threading_model='no'
;;
int-size=*)
int_type_size=${OPTARG#*=}
;;
blas-int-size=*)
blas2blis_int_type_size=${OPTARG#*=}
;;
enable-blas)
enable_blas2blis='yes'
;;
disable-blas)
enable_blas2blis='no'
;;
enable-cblas)
enable_cblas='yes'
;;
disable-cblas)
enable_cblas='no'
;;
*)
print_usage
;;
@@ -294,6 +340,12 @@ main()
t)
threading_model=$OPTARG
;;
i)
int_type_size=$OPTARG
;;
b)
blas2blis_int_type_size=$OPTARG
;;
\?)
print_usage
;;
@@ -430,18 +482,58 @@ main()
# Check the threading model flag.
enable_openmp=0
enable_pthreads=0
if [ "x${threading_model}" = "xauto" ]; then
echo "${script_name}: determining the threading model automatically."
elif [ "x${threading_model}" = "xomp" ]; then
echo "${script_name}: using OpenMP for threading."
enable_openmp=1
elif [ "x${threading_model}" = "xpthreads" ]; then
echo "${script_name}: using Pthreads for threading."
enable_pthreads=1
elif [ "x${threading_model}" = "xno" ]; then
echo "${script_name}: threading is disabled."
else
echo "Unsupported threading model: ${threading_model}."
exit 1
fi
# Convert 'yes' and 'no' flags to booleans.
if [ "x${enable_cblas}" = "xyes" ]; then
echo "${script_name}: the CBLAS compatibility layer is enabled."
enable_cblas=1
# Force BLAS layer when CBLAS is enabled
enable_blas='yes'
else
echo "${script_name}: the CBLAS compatibility layer is disabled."
enable_cblas=0
fi
if [ "x${enable_blas2blis}" = "xyes" ]; then
echo "${script_name}: the BLAS compatibility layer is enabled."
enable_blas2blis=1
else
echo "${script_name}: the BLAS compatibility layer is disabled."
enable_blas2blis=0
fi
# Report integer sizes
if [ "x${int_type_size}" = "x32" ]; then
echo "${script_name}: the internal integer size is 32-bit."
elif [ "x${int_type_size}" = "x64" ]; then
echo "${script_name}: the internal integer size is 64-bit."
else
echo "${script_name}: the internal integer size is automatically determined."
fi
if [ "x${blas2blis_int_type_size}" = "x32" ]; then
echo "${script_name}: the BLAS/CBLAS interface integer size is 32-bit."
elif [ "x${blas2blis_int_type_size}" = "x64" ]; then
echo "${script_name}: the BLAS/CBLAS interface integer size is 64-bit."
else
echo "${script_name}: the BLAS/CBLAS interface integer size is automatically determined."
fi
# Insert escape characters into the paths used in the sed command below.
@@ -466,6 +558,19 @@ main()
| sed "s/@enable_dynamic@/${enable_shared}/g" \
| sed "s/@threading_model@/${threading_model}/g" \
> "${config_mk_out_path}"
# Begin substituting information into the bli_config_h_in file, outputting
# to bli_config_h_out.
echo "${script_name}: creating ${bli_config_h_out_path} from ${bli_config_h_in_path}"
cat "${bli_config_h_in_path}" \
| sed "s/@enable_openmp@/${enable_openmp}/g" \
| sed "s/@enable_pthreads@/${enable_pthreads}/g" \
| sed "s/@int_type_size@/${int_type_size}/g" \
| sed "s/@blas2blis_int_type_size@/${blas2blis_int_type_size}/g" \
| sed "s/@enable_blas2blis@/${enable_blas2blis}/g" \
| sed "s/@enable_cblas@/${enable_cblas}/g" \
> "${bli_config_h_out_path}"
# Create obj sub-directories (if they do not already exist).

View File

@@ -56,6 +56,23 @@ GENFRONT( subv )
GENFRONT( swapv )
#undef GENFRONT
#define GENFRONT( opname ) \
\
void PASTEMAC(opname,_check) \
( \
obj_t* alpha, \
obj_t* x, \
obj_t* beta, \
obj_t* y \
) \
{ \
bli_l1v_axby_check( alpha, x, beta, y ); \
}
GENFRONT( axpbyv )
#undef GENFRONT
#define GENFRONT( opname ) \
\
@@ -137,6 +154,22 @@ GENFRONT( scalv )
GENFRONT( setv )
#undef GENFRONT
#define GENFRONT( opname ) \
\
void PASTEMAC(opname,_check) \
( \
obj_t* x, \
obj_t* beta, \
obj_t* y \
) \
{ \
bli_l1v_xby_check( x, beta, y ); \
}
GENFRONT( xpbyv )
// -----------------------------------------------------------------------------
void bli_l1v_xy_check
@@ -221,6 +254,108 @@ void bli_l1v_axy_check
bli_check_error_code( e_val );
}
void bli_l1v_xby_check
(
obj_t* x,
obj_t* beta,
obj_t* y
)
{
err_t e_val;
// Check object datatypes.
e_val = bli_check_noninteger_object( beta );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( x );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_scalar_object( beta );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( x );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( y );
bli_check_error_code( e_val );
e_val = bli_check_equal_vector_lengths( x, y );
bli_check_error_code( e_val );
// Check object buffers (for non-NULLness).
e_val = bli_check_object_buffer( beta );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( x );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( y );
bli_check_error_code( e_val );
}
void bli_l1v_axby_check
(
obj_t* alpha,
obj_t* x,
obj_t* beta,
obj_t* y
)
{
err_t e_val;
// Check object datatypes.
e_val = bli_check_noninteger_object( alpha );
bli_check_error_code( e_val );
e_val = bli_check_noninteger_object( beta );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( x );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_scalar_object( alpha );
bli_check_error_code( e_val );
e_val = bli_check_scalar_object( beta );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( x );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( y );
bli_check_error_code( e_val );
e_val = bli_check_equal_vector_lengths( x, y );
bli_check_error_code( e_val );
// Check object buffers (for non-NULLness).
e_val = bli_check_object_buffer( alpha );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( beta );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( x );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( y );
bli_check_error_code( e_val );
}
void bli_l1v_dot_check
(
obj_t* alpha,

View File

@@ -52,6 +52,20 @@ GENTPROT( subv )
GENTPROT( swapv )
#undef GENTPROT
#define GENTPROT( opname ) \
\
void PASTEMAC(opname,_check) \
( \
obj_t* alpha, \
obj_t* x, \
obj_t* beta, \
obj_t* y \
);
GENTPROT( axpbyv )
#undef GENTPROT
#define GENTPROT( opname ) \
\
@@ -118,6 +132,20 @@ GENTPROT( scalv )
GENTPROT( setv )
#undef GENTPROT
#define GENTPROT( opname ) \
\
void PASTEMAC(opname,_check) \
( \
obj_t* x, \
obj_t* beta, \
obj_t* y \
);
GENTPROT( xpbyv )
// -----------------------------------------------------------------------------
void bli_l1v_xy_check
@@ -133,6 +161,21 @@ void bli_l1v_axy_check
obj_t* y
);
void bli_l1v_xby_check
(
obj_t* x,
obj_t* beta,
obj_t* y
);
void bli_l1v_axby_check
(
obj_t* alpha,
obj_t* x,
obj_t* beta,
obj_t* y
);
void bli_l1v_dot_check
(
obj_t* alpha,

View File

@@ -64,6 +64,31 @@ GENFRONT( subv, BLIS_SUBV_KER )
GENFRONT( swapv, BLIS_SWAPV_KER )
#undef GENFRONT
#define GENFRONT( opname, kertype, dep1, dep2, dep3, dep4 ) \
\
void PASTEMAC(opname,_cntx_init)( cntx_t* cntx ) \
{ \
bli_cntx_obj_create( cntx ); \
\
/* Initialize the context with kernel dependencies. */ \
PASTEMAC(dep1,_cntx_init)( cntx ); \
PASTEMAC(dep2,_cntx_init)( cntx ); \
PASTEMAC(dep3,_cntx_init)( cntx ); \
PASTEMAC(dep4,_cntx_init)( cntx ); \
\
/* Initialize the context with the kernel associated with the current
operation. */ \
bli_gks_cntx_set_l1v_ker( kertype, cntx ); \
} \
void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ) \
{ \
bli_cntx_obj_free( cntx ); \
}
GENFRONT( axpbyv, BLIS_AXPBYV_KER, axpyv, xpbyv, scal2v, scalv )
#undef GENFRONT
#define GENFRONT( opname, kertype, depname ) \
\
@@ -84,6 +109,29 @@ void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ) \
}
GENFRONT( axpyv, BLIS_AXPYV_KER, addv )
GENFRONT( scal2v, BLIS_SCAL2V_KER, setv )
GENFRONT( scalv, BLIS_SCALV_KER, setv )
#undef GENFRONT
#define GENFRONT( opname, kertype, dep1, dep2 ) \
\
void PASTEMAC(opname,_cntx_init)( cntx_t* cntx ) \
{ \
bli_cntx_obj_create( cntx ); \
\
/* Initialize the context with kernel dependencies. */ \
PASTEMAC(dep1,_cntx_init)( cntx ); \
PASTEMAC(dep2,_cntx_init)( cntx ); \
\
/* Initialize the context with the kernel associated with the current
operation. */ \
bli_gks_cntx_set_l1v_ker( kertype, cntx ); \
} \
void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ) \
{ \
bli_cntx_obj_free( cntx ); \
}
GENFRONT( scal2v, BLIS_SCAL2V_KER, setv, copyv )
GENFRONT( xpbyv, BLIS_XPBYV_KER, addv, copyv )

View File

@@ -44,6 +44,7 @@ void PASTEMAC(opname,_cntx_init)( cntx_t* cntx ); \
void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx );
GENPROT( addv )
GENPROT( axpbyv )
GENPROT( axpyv )
GENPROT( copyv )
GENPROT( dotv )
@@ -54,4 +55,5 @@ GENPROT( scal2v )
GENPROT( setv )
GENPROT( subv )
GENPROT( swapv )
GENPROT( xpbyv )

View File

@@ -49,8 +49,8 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \
( \
conj_t conjx, \
dim_t n, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
cntx_t* cntx \
);
@@ -58,6 +58,24 @@ INSERT_GENTDEF( addv )
INSERT_GENTDEF( copyv )
INSERT_GENTDEF( subv )
// axpbyv
#undef GENTDEF
#define GENTDEF( ctype, ch, opname, tsuf ) \
\
typedef void (*PASTECH2(ch,opname,tsuf)) \
( \
conj_t conjx, \
dim_t n, \
ctype* restrict alpha, \
ctype* restrict x, inc_t incx, \
ctype* restrict beta, \
ctype* restrict y, inc_t incy, \
cntx_t* cntx \
);
INSERT_GENTDEF( axpbyv )
// axpyv, scal2v
#undef GENTDEF
@@ -67,9 +85,9 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \
( \
conj_t conjx, \
dim_t n, \
ctype* alpha, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
ctype* restrict alpha, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
cntx_t* cntx \
);
@@ -86,9 +104,9 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \
conj_t conjx, \
conj_t conjy, \
dim_t n, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
ctype* rho, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
ctype* restrict rho, \
cntx_t* cntx \
);
@@ -104,11 +122,11 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \
conj_t conjx, \
conj_t conjy, \
dim_t n, \
ctype* alpha, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
ctype* beta, \
ctype* rho, \
ctype* restrict alpha, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
ctype* restrict beta, \
ctype* restrict rho, \
cntx_t* cntx \
);
@@ -122,7 +140,7 @@ INSERT_GENTDEF( dotxv )
typedef void (*PASTECH2(ch,opname,tsuf)) \
( \
dim_t n, \
ctype* x, inc_t incx, \
ctype* restrict x, inc_t incx, \
cntx_t* cntx \
);
@@ -137,8 +155,8 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \
( \
conj_t conjalpha, \
dim_t n, \
ctype* alpha, \
ctype* x, inc_t incx, \
ctype* restrict alpha, \
ctype* restrict x, inc_t incx, \
cntx_t* cntx \
);
@@ -153,14 +171,29 @@ INSERT_GENTDEF( setv )
typedef void (*PASTECH2(ch,opname,tsuf)) \
( \
dim_t n, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
cntx_t* cntx \
);
INSERT_GENTDEF( swapv )
// xpybv
#undef GENTDEF
#define GENTDEF( ctype, ch, opname, tsuf ) \
\
typedef void (*PASTECH2(ch,opname,tsuf)) \
( \
conj_t conjx, \
dim_t n, \
ctype* restrict x, inc_t incx, \
ctype* restrict beta, \
ctype* restrict y, inc_t incy, \
cntx_t* cntx \
);
INSERT_GENTDEF( xpbyv )
#endif

View File

@@ -42,11 +42,11 @@
\
void PASTEMAC(ch,opname) \
( \
conj_t conjx, \
dim_t n, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
cntx_t* cntx \
conj_t conjx, \
dim_t n, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
cntx_t* cntx \
);
INSERT_GENTPROT_BASIC( addv_ker_name )
@@ -59,12 +59,29 @@ INSERT_GENTPROT_BASIC( subv_ker_name )
\
void PASTEMAC(ch,opname) \
( \
conj_t conjx, \
dim_t n, \
ctype* alpha, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
cntx_t* cntx \
conj_t conjx, \
dim_t n, \
ctype* restrict alpha, \
ctype* restrict x, inc_t incx, \
ctype* restrict beta, \
ctype* restrict y, inc_t incy, \
cntx_t* cntx \
); \
INSERT_GENTPROT_BASIC( axpbyv_ker_name )
#undef GENTPROT
#define GENTPROT( ctype, ch, opname ) \
\
void PASTEMAC(ch,opname) \
( \
conj_t conjx, \
dim_t n, \
ctype* restrict alpha, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
cntx_t* cntx \
); \
INSERT_GENTPROT_BASIC( axpyv_ker_name )
@@ -76,13 +93,13 @@ INSERT_GENTPROT_BASIC( scal2v_ker_name )
\
void PASTEMAC(ch,opname) \
( \
conj_t conjx, \
conj_t conjy, \
dim_t n, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
ctype* rho, \
cntx_t* cntx \
conj_t conjx, \
conj_t conjy, \
dim_t n, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
ctype* restrict rho, \
cntx_t* cntx \
); \
INSERT_GENTPROT_BASIC( dotv_ker_name )
@@ -93,15 +110,15 @@ INSERT_GENTPROT_BASIC( dotv_ker_name )
\
void PASTEMAC(ch,opname) \
( \
conj_t conjx, \
conj_t conjy, \
dim_t n, \
ctype* alpha, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
ctype* beta, \
ctype* rho, \
cntx_t* cntx \
conj_t conjx, \
conj_t conjy, \
dim_t n, \
ctype* restrict alpha, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
ctype* restrict beta, \
ctype* restrict rho, \
cntx_t* cntx \
); \
INSERT_GENTPROT_BASIC( dotxv_ker_name )
@@ -112,9 +129,9 @@ INSERT_GENTPROT_BASIC( dotxv_ker_name )
\
void PASTEMAC(ch,opname) \
( \
dim_t n, \
ctype* x, inc_t incx, \
cntx_t* cntx \
dim_t n, \
ctype* restrict x, inc_t incx, \
cntx_t* cntx \
); \
INSERT_GENTPROT_BASIC( invertv_ker_name )
@@ -125,11 +142,11 @@ INSERT_GENTPROT_BASIC( invertv_ker_name )
\
void PASTEMAC(ch,opname) \
( \
conj_t conjalpha, \
dim_t n, \
ctype* alpha, \
ctype* x, inc_t incx, \
cntx_t* cntx \
conj_t conjalpha, \
dim_t n, \
ctype* restrict alpha, \
ctype* restrict x, inc_t incx, \
cntx_t* cntx \
); \
INSERT_GENTPROT_BASIC( scalv_ker_name )
@@ -141,11 +158,27 @@ INSERT_GENTPROT_BASIC( setv_ker_name )
\
void PASTEMAC(ch,opname) \
( \
dim_t n, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
cntx_t* cntx \
dim_t n, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
cntx_t* cntx \
); \
INSERT_GENTPROT_BASIC( swapv_ker_name )
#undef GENTPROT
#define GENTPROT( ctype, ch, opname ) \
\
void PASTEMAC(ch,opname) \
( \
conj_t conjx, \
dim_t n, \
ctype* restrict x, inc_t incx, \
ctype* restrict beta, \
ctype* restrict y, inc_t incy, \
cntx_t* cntx \
); \
INSERT_GENTPROT_BASIC( xpbyv_ker_name )

View File

@@ -82,6 +82,64 @@ GENFRONT( copyv )
GENFRONT( subv )
#undef GENFRONT
#define GENFRONT( opname ) \
\
void PASTEMAC(opname,EX_SUF) \
( \
obj_t* alpha, \
obj_t* x, \
obj_t* beta, \
obj_t* y \
BLIS_OAPI_CNTX_PARAM \
) \
{ \
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
\
conj_t conjx = bli_obj_conj_status( *x ); \
dim_t n = bli_obj_vector_dim( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \
\
void* buf_alpha; \
void* buf_beta; \
\
obj_t alpha_local; \
obj_t beta_local; \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( alpha, x, beta, y ); \
\
/* Create local copy-casts of scalars (and apply internal conjugation
as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
beta, &beta_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \
\
/* Invoke the void pointer-based function. */ \
bli_call_ft_9 \
( \
dt, \
opname, \
conjx, \
n, \
buf_alpha, \
buf_x, inc_x, \
buf_beta, \
buf_y, inc_y, \
cntx \
); \
}
GENFRONT( axpbyv )
#undef GENFRONT
#define GENFRONT( opname ) \
@@ -366,5 +424,57 @@ void PASTEMAC(opname,EX_SUF) \
GENFRONT( swapv )
#undef GENFRONT
#define GENFRONT( opname ) \
\
void PASTEMAC(opname,EX_SUF) \
( \
obj_t* x, \
obj_t* beta, \
obj_t* y \
BLIS_OAPI_CNTX_PARAM \
) \
{ \
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
\
conj_t conjx = bli_obj_conj_status( *x ); \
dim_t n = bli_obj_vector_dim( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \
\
void* buf_beta; \
\
obj_t beta_local; \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( x, beta, y ); \
\
/* Create local copy-casts of scalars (and apply internal conjugation
as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
beta, &beta_local ); \
buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \
\
/* Invoke the void pointer-based function. */ \
bli_call_ft_8 \
( \
dt, \
opname, \
conjx, \
n, \
buf_x, inc_x, \
buf_beta, \
buf_y, inc_y, \
cntx \
); \
}
GENFRONT( xpbyv )
#endif

View File

@@ -52,6 +52,21 @@ GENTPROT( copyv )
GENTPROT( subv )
#undef GENTPROT
#define GENTPROT( opname ) \
\
void PASTEMAC(opname,EX_SUF) \
( \
obj_t* alpha, \
obj_t* x, \
obj_t* beta, \
obj_t* y \
BLIS_OAPI_CNTX_PARAM \
);
GENTPROT( axpbyv )
#undef GENTPROT
#define GENTPROT( opname ) \
\
@@ -135,3 +150,17 @@ void PASTEMAC(opname,EX_SUF) \
GENTPROT( swapv )
#undef GENTPROT
#define GENTPROT( opname ) \
\
void PASTEMAC(opname,EX_SUF) \
( \
obj_t* x, \
obj_t* beta, \
obj_t* y \
BLIS_OAPI_CNTX_PARAM \
);
GENTPROT( xpbyv )

View File

@@ -74,6 +74,44 @@ INSERT_GENTFUNC_BASIC( copyv, BLIS_COPYV_KER )
INSERT_GENTFUNC_BASIC( subv, BLIS_SUBV_KER )
#undef GENTFUNC
#define GENTFUNC( ctype, ch, opname, kerid ) \
\
void PASTEMAC(ch,opname) \
( \
conj_t conjx, \
dim_t n, \
ctype* alpha, \
ctype* x, inc_t incx, \
ctype* beta, \
ctype* y, inc_t incy, \
cntx_t* cntx \
) \
{ \
const num_t dt = PASTEMAC(ch,type); \
cntx_t* cntx_p; \
\
bli_cntx_init_local_if( opname, cntx, cntx_p ); \
\
PASTECH2(ch,opname,_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx_p ); \
\
f \
( \
conjx, \
n, \
alpha, \
x, incx, \
beta, \
y, incy, \
cntx_p \
); \
\
bli_cntx_finalize_local_if( opname, cntx ); \
}
INSERT_GENTFUNC_BASIC( axpbyv, BLIS_AXPBYV_KER )
#undef GENTFUNC
#define GENTFUNC( ctype, ch, opname, kerid ) \
\
@@ -287,3 +325,39 @@ void PASTEMAC(ch,opname) \
INSERT_GENTFUNC_BASIC( swapv, BLIS_SWAPV_KER )
#undef GENTFUNC
#define GENTFUNC( ctype, ch, opname, kerid ) \
\
void PASTEMAC(ch,opname) \
( \
conj_t conjx, \
dim_t n, \
ctype* x, inc_t incx, \
ctype* beta, \
ctype* y, inc_t incy, \
cntx_t* cntx \
) \
{ \
const num_t dt = PASTEMAC(ch,type); \
cntx_t* cntx_p; \
\
bli_cntx_init_local_if( opname, cntx, cntx_p ); \
\
PASTECH2(ch,opname,_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx_p ); \
\
f \
( \
conjx, \
n, \
x, incx, \
beta, \
y, incy, \
cntx_p \
); \
\
bli_cntx_finalize_local_if( opname, cntx ); \
}
INSERT_GENTFUNC_BASIC( xpbyv, BLIS_XPBYV_KER )

View File

@@ -40,6 +40,9 @@
#undef addv_ker_name
#define addv_ker_name addv
#undef axpbyv_ker_name
#define axpbyv_ker_name axpbyv
#undef axpyv_ker_name
#define axpyv_ker_name axpyv
@@ -70,6 +73,9 @@
#undef swapv_ker_name
#define swapv_ker_name swapv
#undef xpbyv_ker_name
#define xpbyv_ker_name xpbyv
// Include the level-1v kernel API template.

View File

@@ -39,15 +39,15 @@
\
void PASTEMAC(ch,varname) \
( \
conj_t conjx, \
dim_t n, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
cntx_t* cntx \
conj_t conjx, \
dim_t n, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
cntx_t* cntx \
) \
{ \
ctype* chi1; \
ctype* psi1; \
ctype* restrict chi1; \
ctype* restrict psi1; \
dim_t i; \
\
if ( bli_zero_dim1( n ) ) return; \
@@ -57,22 +57,42 @@ void PASTEMAC(ch,varname) \
\
if ( bli_is_conj( conjx ) ) \
{ \
for ( i = 0; i < n; ++i ) \
if (incx == 1 && incy == 1) \
{ \
PASTEMAC(ch,addjs)( *chi1, *psi1 ); \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,addjs)( chi1[i], psi1[i] ); \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,addjs)( *chi1, *psi1 ); \
\
chi1 += incx; \
psi1 += incy; \
chi1 += incx; \
psi1 += incy; \
} \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
if (incx == 1 && incy == 1) \
{ \
PASTEMAC(ch,adds)( *chi1, *psi1 ); \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,adds)( chi1[i], psi1[i] ); \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,adds)( *chi1, *psi1 ); \
\
chi1 += incx; \
psi1 += incy; \
chi1 += incx; \
psi1 += incy; \
} \
} \
} \
}

View File

@@ -0,0 +1,244 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
#undef GENTFUNC
#define GENTFUNC( ctype, ch, varname ) \
\
void PASTEMAC(ch,varname) \
( \
conj_t conjx, \
dim_t n, \
ctype* restrict alpha, \
ctype* restrict x, inc_t incx, \
ctype* restrict beta, \
ctype* restrict y, inc_t incy, \
cntx_t* cntx \
) \
{ \
ctype* restrict chi1; \
ctype* restrict psi1; \
dim_t i; \
\
if ( bli_zero_dim1( n ) ) return; \
\
if ( PASTEMAC(ch,eq0)( *alpha ) ) \
{ \
/* If alpha is zero and beta is zero, set to zero. */ \
if ( PASTEMAC(ch,eq0)( *beta ) ) \
{ \
ctype* zero = PASTEMAC(ch,0); \
\
/* Query the context for the kernel function pointer. */ \
const num_t dt = PASTEMAC(ch,type); \
PASTECH(ch,setv_ft) setv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SETV_KER, cntx ); \
\
setv_p \
( \
BLIS_NO_CONJUGATE, \
n, \
zero, \
y, incy, \
cntx \
); \
return; \
} \
/* If alpha is zero and beta is one, return. */ \
else if ( PASTEMAC(ch,eq1)( *beta ) ) \
{ \
return; \
} \
/* If alpha is zero, scale by beta. */ \
else \
{ \
/* Query the context for the kernel function pointer. */ \
const num_t dt = PASTEMAC(ch,type); \
PASTECH(ch,scalv_ft) scalv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SCALV_KER, cntx ); \
\
scalv_p \
( \
BLIS_NO_CONJUGATE, \
n, \
beta, \
y, incy, \
cntx \
); \
return; \
} \
\
} \
else if ( PASTEMAC(ch,eq1)( *alpha ) ) \
{ \
/* If alpha is one and beta is zero, copy. */ \
if ( PASTEMAC(ch,eq0)( *beta ) ) \
{ \
/* Query the context for the kernel function pointer. */ \
const num_t dt = PASTEMAC(ch,type); \
PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \
\
copyv_p \
( \
conjx, \
n, \
x, incx, \
y, incy, \
cntx \
); \
return; \
} \
/* If alpha is one and beta is one, add. */ \
else if ( PASTEMAC(ch,eq1)( *beta ) ) \
{ \
/* Query the context for the kernel function pointer. */ \
const num_t dt = PASTEMAC(ch,type); \
PASTECH(ch,addv_ft) addv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_ADDV_KER, cntx ); \
\
addv_p \
( \
conjx, \
n, \
x, incx, \
y, incy, \
cntx \
); \
return; \
} \
/* If alpha is one, call xpby. */ \
else \
{ \
/* Query the context for the kernel function pointer. */ \
const num_t dt = PASTEMAC(ch,type); \
PASTECH(ch,xpbyv_ft) xpbyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_XPBYV_KER, cntx ); \
\
xpbyv_p \
( \
conjx, \
n, \
x, incx, \
beta, \
y, incy, \
cntx \
); \
return; \
} \
} \
else \
{ \
/* If beta is zero, call scal2. */ \
if ( PASTEMAC(ch,eq0)( *beta ) ) \
{ \
/* Query the context for the kernel function pointer. */ \
const num_t dt = PASTEMAC(ch,type); \
PASTECH(ch,scal2v_ft) scal2v_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SCAL2V_KER, cntx ); \
\
scal2v_p \
( \
conjx, \
n, \
alpha, \
x, incx, \
y, incy, \
cntx \
); \
return; \
} \
/* If beta is one, call axpy. */ \
else if ( PASTEMAC(ch,eq1)( *beta ) ) \
{ \
/* Query the context for the kernel function pointer. */ \
const num_t dt = PASTEMAC(ch,type); \
PASTECH(ch,axpyv_ft) axpyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \
\
axpyv_p \
( \
conjx, \
n, \
alpha, \
x, incx, \
y, incy, \
cntx \
); \
return; \
} \
\
} \
\
chi1 = x; \
psi1 = y; \
\
if ( bli_is_conj( conjx ) ) \
{ \
if ( incx == 1 && incy == 1 ) \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,axpbyjs)( *alpha, chi1[i], *beta, psi1[i] ); \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,axpbyjs)( *alpha, *chi1, *beta, *psi1 ); \
\
chi1 += incx; \
psi1 += incy; \
} \
} \
} \
else \
{ \
if ( incx == 1 && incy == 1 ) \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,axpbys)( *alpha, chi1[i], *beta, psi1[i] ); \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,axpbys)( *alpha, *chi1, *beta, *psi1 ); \
\
chi1 += incx; \
psi1 += incy; \
} \
} \
} \
}
INSERT_GENTFUNC_BASIC0( axpbyv_ref )

View File

@@ -39,16 +39,16 @@
\
void PASTEMAC(ch,varname) \
( \
conj_t conjx, \
dim_t n, \
ctype* alpha, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
cntx_t* cntx \
conj_t conjx, \
dim_t n, \
ctype* restrict alpha, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
cntx_t* cntx \
) \
{ \
ctype* chi1; \
ctype* psi1; \
ctype* restrict chi1; \
ctype* restrict psi1; \
dim_t i; \
\
if ( bli_zero_dim1( n ) ) return; \
@@ -79,22 +79,42 @@ void PASTEMAC(ch,varname) \
\
if ( bli_is_conj( conjx ) ) \
{ \
for ( i = 0; i < n; ++i ) \
if ( incx == 1 && incy == 1 ) \
{ \
PASTEMAC(ch,axpyjs)( *alpha, *chi1, *psi1 ); \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,axpyjs)( *alpha, chi1[i], psi1[i] ); \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,axpyjs)( *alpha, *chi1, *psi1 ); \
\
chi1 += incx; \
psi1 += incy; \
chi1 += incx; \
psi1 += incy; \
} \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
if ( incx == 1 && incy == 1 ) \
{ \
PASTEMAC(ch,axpys)( *alpha, *chi1, *psi1 ); \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,axpys)( *alpha, chi1[i], psi1[i] ); \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,axpys)( *alpha, *chi1, *psi1 ); \
\
chi1 += incx; \
psi1 += incy; \
chi1 += incx; \
psi1 += incy; \
} \
} \
} \
}

View File

@@ -39,15 +39,15 @@
\
void PASTEMAC(ch,varname) \
( \
conj_t conjx, \
dim_t n, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
cntx_t* cntx \
conj_t conjx, \
dim_t n, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
cntx_t* cntx \
) \
{ \
ctype* chi1; \
ctype* psi1; \
ctype* restrict chi1; \
ctype* restrict psi1; \
dim_t i; \
\
if ( bli_zero_dim1( n ) ) return; \
@@ -57,22 +57,42 @@ void PASTEMAC(ch,varname) \
\
if ( bli_is_conj( conjx ) ) \
{ \
for ( i = 0; i < n; ++i ) \
if ( incx == 1 && incy == 1 ) \
{ \
PASTEMAC(ch,copyjs)( *chi1, *psi1 ); \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,copyjs)( chi1[i], psi1[i] ); \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,copyjs)( *chi1, *psi1 ); \
\
chi1 += incx; \
psi1 += incy; \
chi1 += incx; \
psi1 += incy; \
} \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
if ( incx == 1 && incy == 1 ) \
{ \
PASTEMAC(ch,copys)( *chi1, *psi1 ); \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,copys)( chi1[i], psi1[i] ); \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,copys)( *chi1, *psi1 ); \
\
chi1 += incx; \
psi1 += incy; \
chi1 += incx; \
psi1 += incy; \
} \
} \
} \
}

View File

@@ -39,17 +39,17 @@
\
void PASTEMAC(ch,varname) \
( \
conj_t conjx, \
conj_t conjy, \
dim_t n, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
ctype* rho, \
cntx_t* cntx \
conj_t conjx, \
conj_t conjy, \
dim_t n, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
ctype* restrict rho, \
cntx_t* cntx \
) \
{ \
ctype* chi1; \
ctype* psi1; \
ctype* restrict chi1; \
ctype* restrict psi1; \
ctype dotxy; \
dim_t i; \
conj_t conjx_use; \
@@ -75,22 +75,42 @@ void PASTEMAC(ch,varname) \
\
if ( bli_is_conj( conjx_use ) ) \
{ \
for ( i = 0; i < n; ++i ) \
if ( incx == 1 && incy == 1 ) \
{ \
PASTEMAC(ch,dotjs)( *chi1, *psi1, dotxy ); \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,dotjs)( chi1[i], psi1[i], dotxy ); \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,dotjs)( *chi1, *psi1, dotxy ); \
\
chi1 += incx; \
psi1 += incy; \
chi1 += incx; \
psi1 += incy; \
} \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
if ( incx == 1 && incy == 1 ) \
{ \
PASTEMAC(ch,dots)( *chi1, *psi1, dotxy ); \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,dots)( chi1[i], psi1[i], dotxy ); \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,dots)( *chi1, *psi1, dotxy ); \
\
chi1 += incx; \
psi1 += incy; \
chi1 += incx; \
psi1 += incy; \
} \
} \
} \
\

View File

@@ -39,19 +39,19 @@
\
void PASTEMAC(ch,varname) \
( \
conj_t conjx, \
conj_t conjy, \
dim_t n, \
ctype* alpha, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
ctype* beta, \
ctype* rho, \
cntx_t* cntx \
conj_t conjx, \
conj_t conjy, \
dim_t n, \
ctype* restrict alpha, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
ctype* restrict beta, \
ctype* restrict rho, \
cntx_t* cntx \
) \
{ \
ctype* chi1; \
ctype* psi1; \
ctype* restrict chi1; \
ctype* restrict psi1; \
ctype dotxy; \
dim_t i; \
conj_t conjx_use; \
@@ -83,22 +83,42 @@ void PASTEMAC(ch,varname) \
\
if ( bli_is_conj( conjx_use ) ) \
{ \
for ( i = 0; i < n; ++i ) \
if ( incx == 1 && incy == 1 ) \
{ \
PASTEMAC(ch,dotjs)( *chi1, *psi1, dotxy ); \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,dotjs)( chi1[i], psi1[i], dotxy ); \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,dotjs)( *chi1, *psi1, dotxy ); \
\
chi1 += incx; \
psi1 += incy; \
chi1 += incx; \
psi1 += incy; \
} \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
if ( incx == 1 && incy == 1 ) \
{ \
PASTEMAC(ch,dots)( *chi1, *psi1, dotxy ); \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,dots)( chi1[i], psi1[i], dotxy ); \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,dots)( *chi1, *psi1, dotxy ); \
\
chi1 += incx; \
psi1 += incy; \
chi1 += incx; \
psi1 += incy; \
} \
} \
} \
\

View File

@@ -39,23 +39,33 @@
\
void PASTEMAC(ch,varname) \
( \
dim_t n, \
ctype* x, inc_t incx, \
cntx_t* cntx \
dim_t n, \
ctype* restrict x, inc_t incx, \
cntx_t* cntx \
) \
{ \
ctype* chi1; \
ctype* restrict chi1; \
dim_t i; \
\
if ( bli_zero_dim1( n ) ) return; \
\
chi1 = x; \
\
for ( i = 0; i < n; ++i ) \
if ( incx == 1 ) \
{ \
PASTEMAC(ch,inverts)( *chi1 ); \
\
chi1 += incx; \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,inverts)( chi1[i] ); \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,inverts)( *chi1 ); \
\
chi1 += incx; \
} \
} \
}

View File

@@ -32,116 +32,48 @@
*/
// Redefine level-1v kernel API names to induce prototypes.
#undef GENTPROT
#define GENTPROT( ctype, ch, varname ) \
\
void PASTEMAC(ch,varname) \
( \
conj_t conjx, \
dim_t n, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
cntx_t* cntx \
);
#undef addv_ker_name
#define addv_ker_name addv_ref
INSERT_GENTPROT_BASIC( addv_ref )
INSERT_GENTPROT_BASIC( copyv_ref )
INSERT_GENTPROT_BASIC( subv_ref )
#undef axpbyv_ker_name
#define axpbyv_ker_name axpbyv_ref
#undef axpyv_ker_name
#define axpyv_ker_name axpyv_ref
#undef GENTPROT
#define GENTPROT( ctype, ch, varname ) \
\
void PASTEMAC(ch,varname) \
( \
conj_t conjx, \
dim_t n, \
ctype* alpha, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
cntx_t* cntx \
);
#undef copyv_ker_name
#define copyv_ker_name copyv_ref
INSERT_GENTPROT_BASIC( axpyv_ref )
INSERT_GENTPROT_BASIC( scal2v_ref )
#undef dotv_ker_name
#define dotv_ker_name dotv_ref
#undef dotxv_ker_name
#define dotxv_ker_name dotxv_ref
#undef GENTPROT
#define GENTPROT( ctype, ch, varname ) \
\
void PASTEMAC(ch,varname) \
( \
conj_t conjx, \
conj_t conjy, \
dim_t n, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
ctype* rho, \
cntx_t* cntx \
);
#undef invertv_ker_name
#define invertv_ker_name invertv_ref
INSERT_GENTPROT_BASIC( dotv_ref )
#undef scalv_ker_name
#define scalv_ker_name scalv_ref
#undef scal2v_ker_name
#define scal2v_ker_name scal2v_ref
#undef GENTPROT
#define GENTPROT( ctype, ch, varname ) \
\
void PASTEMAC(ch,varname) \
( \
conj_t conjx, \
conj_t conjy, \
dim_t n, \
ctype* alpha, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
ctype* beta, \
ctype* rho, \
cntx_t* cntx \
);
#undef setv_ker_name
#define setv_ker_name setv_ref
INSERT_GENTPROT_BASIC( dotxv_ref )
#undef subv_ker_name
#define subv_ker_name subv_ref
#undef swapv_ker_name
#define swapv_ker_name swapv_ref
#undef GENTPROT
#define GENTPROT( ctype, ch, varname ) \
\
void PASTEMAC(ch,varname) \
( \
dim_t n, \
ctype* x, inc_t incx, \
cntx_t* cntx \
);
#undef xpbyv_ker_name
#define xpbyv_ker_name xpbyv_ref
INSERT_GENTPROT_BASIC( invertv_ref )
// Include the level-1v kernel API template.
#undef GENTPROT
#define GENTPROT( ctype, ch, varname ) \
\
void PASTEMAC(ch,varname) \
( \
conj_t conjalpha, \
dim_t n, \
ctype* alpha, \
ctype* x, inc_t incx, \
cntx_t* cntx \
);
INSERT_GENTPROT_BASIC( scalv_ref )
INSERT_GENTPROT_BASIC( setv_ref )
#undef GENTPROT
#define GENTPROT( ctype, ch, varname ) \
\
void PASTEMAC(ch,varname) \
( \
dim_t n, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
cntx_t* cntx \
);
INSERT_GENTPROT_BASIC( swapv_ref )
#include "bli_l1v_ker.h"

View File

@@ -39,16 +39,16 @@
\
void PASTEMAC(ch,varname) \
( \
conj_t conjx, \
dim_t n, \
ctype* alpha, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
cntx_t* cntx \
conj_t conjx, \
dim_t n, \
ctype* restrict alpha, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
cntx_t* cntx \
) \
{ \
ctype* chi1; \
ctype* psi1; \
ctype* restrict chi1; \
ctype* restrict psi1; \
dim_t i; \
\
if ( bli_zero_dim1( n ) ) return; \
@@ -72,28 +72,65 @@ void PASTEMAC(ch,varname) \
); \
return; \
} \
/* If alpha is one, use copyv. */ \
else if ( PASTEMAC(ch,eq0)( *alpha ) ) \
{ \
/* Query the context for the kernel function pointer. */ \
const num_t dt = PASTEMAC(ch,type); \
PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \
\
copyv_p \
( \
BLIS_NO_CONJUGATE, \
n, \
x, incx, \
y, incy, \
cntx \
); \
return; \
} \
\
chi1 = x; \
psi1 = y; \
\
if ( bli_is_conj( conjx ) ) \
{ \
for ( i = 0; i < n; ++i ) \
if ( incx == 1 && incy == 1 ) \
{ \
PASTEMAC(ch,scal2js)( *alpha, *chi1, *psi1 ); \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,scal2js)( *alpha, chi1[i], psi1[i] ); \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,scal2js)( *alpha, *chi1, *psi1 ); \
\
chi1 += incx; \
psi1 += incy; \
chi1 += incx; \
psi1 += incy; \
} \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
if ( incx == 1 && incy == 1 ) \
{ \
PASTEMAC(ch,scal2s)( *alpha, *chi1, *psi1 ); \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,scal2s)( *alpha, chi1[i], psi1[i] ); \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,scal2s)( *alpha, *chi1, *psi1 ); \
\
chi1 += incx; \
psi1 += incy; \
chi1 += incx; \
psi1 += incy; \
} \
} \
} \
}

View File

@@ -39,14 +39,14 @@
\
void PASTEMAC(ch,varname) \
( \
conj_t conjalpha, \
dim_t n, \
ctype* alpha, \
ctype* x, inc_t incx, \
cntx_t* cntx \
conj_t conjalpha, \
dim_t n, \
ctype* restrict alpha, \
ctype* restrict x, inc_t incx, \
cntx_t* cntx \
) \
{ \
ctype* chi1; \
ctype* restrict chi1; \
ctype alpha_conj; \
dim_t i; \
\
@@ -79,11 +79,21 @@ void PASTEMAC(ch,varname) \
\
chi1 = x; \
\
for ( i = 0; i < n; ++i ) \
if ( incx == 1 ) \
{ \
PASTEMAC(ch,scals)( alpha_conj, *chi1 ); \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,scals)( alpha_conj, chi1[i] ); \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,scals)( alpha_conj, *chi1 ); \
\
chi1 += incx; \
chi1 += incx; \
} \
} \
}

View File

@@ -39,14 +39,14 @@
\
void PASTEMAC(ch,varname) \
( \
conj_t conjalpha, \
dim_t n, \
ctype* alpha, \
ctype* x, inc_t incx, \
cntx_t* cntx \
conj_t conjalpha, \
dim_t n, \
ctype* restrict alpha, \
ctype* restrict x, inc_t incx, \
cntx_t* cntx \
) \
{ \
ctype* chi1; \
ctype* restrict chi1; \
ctype alpha_conj; \
dim_t i; \
\
@@ -56,22 +56,42 @@ void PASTEMAC(ch,varname) \
\
if ( PASTEMAC(ch,eq0)( *alpha ) ) \
{ \
for ( i = 0; i < n; ++i ) \
if ( incx == 1 ) \
{ \
PASTEMAC(ch,set0s)( *chi1 ); \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,set0s)( chi1[i] ); \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,set0s)( *chi1 ); \
\
chi1 += incx; \
chi1 += incx; \
} \
} \
} \
else \
{ \
PASTEMAC(ch,copycjs)( conjalpha, *alpha, alpha_conj ); \
\
for ( i = 0; i < n; ++i ) \
if ( incx == 1 ) \
{ \
PASTEMAC(ch,copys)( alpha_conj, *chi1 ); \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,copys)( alpha_conj, chi1[i] ); \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,copys)( alpha_conj, *chi1 ); \
\
chi1 += incx; \
chi1 += incx; \
} \
} \
} \
}

View File

@@ -39,15 +39,15 @@
\
void PASTEMAC(ch,varname) \
( \
conj_t conjx, \
dim_t n, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
cntx_t* cntx \
conj_t conjx, \
dim_t n, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
cntx_t* cntx \
) \
{ \
ctype* chi1; \
ctype* psi1; \
ctype* restrict chi1; \
ctype* restrict psi1; \
dim_t i; \
\
if ( bli_zero_dim1( n ) ) return; \
@@ -57,22 +57,42 @@ void PASTEMAC(ch,varname) \
\
if ( bli_is_conj( conjx ) ) \
{ \
for ( i = 0; i < n; ++i ) \
if ( incx == 1 && incy == 1 ) \
{ \
PASTEMAC(ch,subjs)( *chi1, *psi1 ); \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,subjs)( chi1[i], psi1[i] ); \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,subjs)( *chi1, *psi1 ); \
\
chi1 += incx; \
psi1 += incy; \
chi1 += incx; \
psi1 += incy; \
} \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
if ( incx == 1 && incy == 1 ) \
{ \
PASTEMAC(ch,subs)( *chi1, *psi1 ); \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,subs)( chi1[i], psi1[i] ); \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,subs)( *chi1, *psi1 ); \
\
chi1 += incx; \
psi1 += incy; \
chi1 += incx; \
psi1 += incy; \
} \
} \
} \
}

View File

@@ -39,14 +39,14 @@
\
void PASTEMAC(ch,varname) \
( \
dim_t n, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
cntx_t* cntx \
dim_t n, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
cntx_t* cntx \
) \
{ \
ctype* chi1; \
ctype* psi1; \
ctype* restrict chi1; \
ctype* restrict psi1; \
dim_t i; \
\
if ( bli_zero_dim1( n ) ) return; \
@@ -54,12 +54,22 @@ void PASTEMAC(ch,varname) \
chi1 = x; \
psi1 = y; \
\
for ( i = 0; i < n; ++i ) \
if ( incx == 1 && incy == 1 ) \
{ \
PASTEMAC(ch,swaps)( *chi1, *psi1 ); \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,swaps)( chi1[i], psi1[i] ); \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,swaps)( *chi1, *psi1 ); \
\
chi1 += incx; \
psi1 += incy; \
chi1 += incx; \
psi1 += incy; \
} \
} \
}

View File

@@ -0,0 +1,137 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
#undef GENTFUNC
#define GENTFUNC( ctype, ch, varname ) \
\
void PASTEMAC(ch,varname) \
( \
conj_t conjx, \
dim_t n, \
ctype* restrict x, inc_t incx, \
ctype* restrict beta, \
ctype* restrict y, inc_t incy, \
cntx_t* cntx \
) \
{ \
ctype* restrict chi1; \
ctype* restrict psi1; \
dim_t i; \
\
if ( bli_zero_dim1( n ) ) return; \
\
/* If beta is zero, use copyv. */ \
if ( PASTEMAC(ch,eq0)( *beta ) ) \
{ \
/* Query the context for the kernel function pointer. */ \
const num_t dt = PASTEMAC(ch,type); \
PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \
\
copyv_p \
( \
conjx, \
n, \
x, incx, \
y, incy, \
cntx \
); \
return; \
} \
/* If alpha is one, use addv. */ \
else if ( PASTEMAC(ch,eq1)( *beta ) ) \
{ \
/* Query the context for the kernel function pointer. */ \
const num_t dt = PASTEMAC(ch,type); \
PASTECH(ch,addv_ft) addv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_ADDV_KER, cntx ); \
\
addv_p \
( \
conjx, \
n, \
x, incx, \
y, incy, \
cntx \
); \
return; \
} \
\
chi1 = x; \
psi1 = y; \
\
if ( bli_is_conj( conjx ) ) \
{ \
if ( incx == 1 && incy == 1 ) \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,xpbyjs)( chi1[i], *beta, psi1[i] ); \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,xpbyjs)( *chi1, *beta, *psi1 ); \
\
chi1 += incx; \
psi1 += incy; \
} \
} \
} \
else \
{ \
if ( incx == 1 && incy == 1 ) \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,xpbys)( chi1[i], *beta, psi1[i] ); \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC(ch,xpbys)( *chi1, *beta, *psi1 ); \
\
chi1 += incx; \
psi1 += incy; \
} \
} \
} \
}
INSERT_GENTFUNC_BASIC0( xpbyv_ref )

View File

@@ -34,7 +34,7 @@
#include "blis.h"
packv_t* packv_cntl;
packv_t* packv_cntl = NULL;
void bli_packv_cntl_init( void )
{

View File

@@ -34,7 +34,7 @@
#include "blis.h"
scalv_t* scalv_cntl;
scalv_t* scalv_cntl = NULL;
void bli_scalv_cntl_init()
{

View File

@@ -34,7 +34,7 @@
#include "blis.h"
unpackv_t* unpackv_cntl;
unpackv_t* unpackv_cntl = NULL;
void bli_unpackv_cntl_init()
{

View File

@@ -50,11 +50,11 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \
conj_t conjx, \
conj_t conjy, \
dim_t n, \
ctype* alpha1, \
ctype* alpha2, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
ctype* z, inc_t incz, \
ctype* restrict alpha1, \
ctype* restrict alpha2, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
ctype* restrict z, inc_t incz, \
cntx_t* cntx \
);
@@ -71,10 +71,10 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype* alpha, \
ctype* a, inc_t inca, inc_t lda, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
ctype* restrict alpha, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
cntx_t* cntx \
);
@@ -91,11 +91,11 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \
conj_t conjx, \
conj_t conjy, \
dim_t m, \
ctype* alpha, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
ctype* rho, \
ctype* z, inc_t incz, \
ctype* restrict alpha, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
ctype* restrict rho, \
ctype* restrict z, inc_t incz, \
cntx_t* cntx \
);
@@ -112,11 +112,11 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype* alpha, \
ctype* a, inc_t inca, inc_t lda, \
ctype* x, inc_t incx, \
ctype* beta, \
ctype* y, inc_t incy, \
ctype* restrict alpha, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict x, inc_t incx, \
ctype* restrict beta, \
ctype* restrict y, inc_t incy, \
cntx_t* cntx \
);
@@ -135,13 +135,13 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype* alpha, \
ctype* a, inc_t inca, inc_t lda, \
ctype* w, inc_t incw, \
ctype* x, inc_t incx, \
ctype* beta, \
ctype* y, inc_t incy, \
ctype* z, inc_t incz, \
ctype* restrict alpha, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict w, inc_t incw, \
ctype* restrict x, inc_t incx, \
ctype* restrict beta, \
ctype* restrict y, inc_t incy, \
ctype* restrict z, inc_t incz, \
cntx_t* cntx \
);

View File

@@ -42,15 +42,15 @@
\
void PASTEMAC(ch,opname) \
( \
conj_t conjx, \
conj_t conjy, \
dim_t n, \
ctype* alphax, \
ctype* alphay, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
ctype* z, inc_t incz, \
cntx_t* cntx \
conj_t conjx, \
conj_t conjy, \
dim_t n, \
ctype* restrict alphax, \
ctype* restrict alphay, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
ctype* restrict z, inc_t incz, \
cntx_t* cntx \
);
INSERT_GENTPROT_BASIC( axpy2v_ker_name )
@@ -61,15 +61,15 @@ INSERT_GENTPROT_BASIC( axpy2v_ker_name )
\
void PASTEMAC(ch,opname) \
( \
conj_t conja, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype* alpha, \
ctype* a, inc_t inca, inc_t lda, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
cntx_t* cntx \
conj_t conja, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype* restrict alpha, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
cntx_t* cntx \
);
INSERT_GENTPROT_BASIC( axpyf_ker_name )
@@ -80,16 +80,16 @@ INSERT_GENTPROT_BASIC( axpyf_ker_name )
\
void PASTEMAC(ch,opname) \
( \
conj_t conjxt, \
conj_t conjx, \
conj_t conjy, \
dim_t n, \
ctype* alpha, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
ctype* rho, \
ctype* z, inc_t incz, \
cntx_t* cntx \
conj_t conjxt, \
conj_t conjx, \
conj_t conjy, \
dim_t n, \
ctype* restrict alpha, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
ctype* restrict rho, \
ctype* restrict z, inc_t incz, \
cntx_t* cntx \
);
INSERT_GENTPROT_BASIC( dotaxpyv_ker_name )
@@ -100,20 +100,20 @@ INSERT_GENTPROT_BASIC( dotaxpyv_ker_name )
\
void PASTEMAC(ch,opname) \
( \
conj_t conjat, \
conj_t conja, \
conj_t conjw, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype* alpha, \
ctype* a, inc_t inca, inc_t lda, \
ctype* w, inc_t incw, \
ctype* x, inc_t incx, \
ctype* beta, \
ctype* y, inc_t incy, \
ctype* z, inc_t incz, \
cntx_t* cntx \
conj_t conjat, \
conj_t conja, \
conj_t conjw, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype* restrict alpha, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict w, inc_t incw, \
ctype* restrict x, inc_t incx, \
ctype* restrict beta, \
ctype* restrict y, inc_t incy, \
ctype* restrict z, inc_t incz, \
cntx_t* cntx \
);
INSERT_GENTPROT_BASIC( dotxaxpyf_ker_name )
@@ -124,16 +124,16 @@ INSERT_GENTPROT_BASIC( dotxaxpyf_ker_name )
\
void PASTEMAC(ch,opname) \
( \
conj_t conjat, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype* alpha, \
ctype* a, inc_t inca, inc_t lda, \
ctype* x, inc_t incx, \
ctype* beta, \
ctype* y, inc_t incy, \
cntx_t* cntx \
conj_t conjat, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype* restrict alpha, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict x, inc_t incx, \
ctype* restrict beta, \
ctype* restrict y, inc_t incy, \
cntx_t* cntx \
);
INSERT_GENTPROT_BASIC( dotxf_ker_name )

View File

@@ -40,15 +40,15 @@
\
void PASTEMAC(ch,varname) \
( \
conj_t conjx, \
conj_t conjy, \
dim_t n, \
ctype* alphax, \
ctype* alphay, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
ctype* z, inc_t incz, \
cntx_t* cntx \
conj_t conjx, \
conj_t conjy, \
dim_t n, \
ctype* restrict alphax, \
ctype* restrict alphay, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
ctype* restrict z, inc_t incz, \
cntx_t* cntx \
) \
{ \
/* Query the context for the kernel function pointer. */ \

View File

@@ -40,15 +40,15 @@
\
void PASTEMAC(ch,varname) \
( \
conj_t conja, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype* alpha, \
ctype* a, inc_t inca, inc_t lda, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
cntx_t* cntx \
conj_t conja, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype* restrict alpha, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
cntx_t* cntx \
) \
{ \
ctype* a1; \

View File

@@ -40,16 +40,16 @@
\
void PASTEMAC(ch,varname) \
( \
conj_t conjxt, \
conj_t conjx, \
conj_t conjy, \
dim_t m, \
ctype* alpha, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
ctype* rho, \
ctype* z, inc_t incz, \
cntx_t* cntx \
conj_t conjxt, \
conj_t conjx, \
conj_t conjy, \
dim_t m, \
ctype* restrict alpha, \
ctype* restrict x, inc_t incx, \
ctype* restrict y, inc_t incy, \
ctype* restrict rho, \
ctype* restrict z, inc_t incz, \
cntx_t* cntx \
) \
{ \
ctype* one = PASTEMAC(ch,1); \

View File

@@ -40,20 +40,20 @@
\
void PASTEMAC(ch,varname) \
( \
conj_t conjat, \
conj_t conja, \
conj_t conjw, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype* alpha, \
ctype* a, inc_t inca, inc_t lda, \
ctype* w, inc_t incw, \
ctype* x, inc_t incx, \
ctype* beta, \
ctype* y, inc_t incy, \
ctype* z, inc_t incz, \
cntx_t* cntx \
conj_t conjat, \
conj_t conja, \
conj_t conjw, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype* restrict alpha, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict w, inc_t incw, \
ctype* restrict x, inc_t incx, \
ctype* restrict beta, \
ctype* restrict y, inc_t incy, \
ctype* restrict z, inc_t incz, \
cntx_t* cntx \
) \
{ \
/* A is m x n. */ \

View File

@@ -40,16 +40,16 @@
\
void PASTEMAC(ch,varname) \
( \
conj_t conjat, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype* alpha, \
ctype* a, inc_t inca, inc_t lda, \
ctype* x, inc_t incx, \
ctype* beta, \
ctype* y, inc_t incy, \
cntx_t* cntx \
conj_t conjat, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype* restrict alpha, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict x, inc_t incx, \
ctype* restrict beta, \
ctype* restrict y, inc_t incy, \
cntx_t* cntx \
) \
{ \
ctype* a1; \

View File

@@ -32,129 +32,24 @@
*/
// Redefine level-1f kernel API names to induce prototypes.
#undef GENTPROT
#define GENTPROT( ctype, ch, varname ) \
\
void PASTEMAC(ch,varname) \
( \
conj_t conjx, \
conj_t conjy, \
dim_t n, \
ctype* alpha1, \
ctype* alpha2, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
ctype* z, inc_t incz, \
cntx_t* cntx \
);
#undef axpy2v_ker_name
#define axpy2v_ker_name axpy2v_ref
INSERT_GENTPROT_BASIC( axpy2v_ref )
#undef dotaxpyv_ker_name
#define dotaxpyv_ker_name dotaxpyv_ref
#undef axpyf_ker_name
#define axpyf_ker_name axpyf_ref
#undef GENTPROT
#define GENTPROT( ctype, ch, varname ) \
\
void PASTEMAC(ch,varname) \
( \
conj_t conja, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype* alpha, \
ctype* a, inc_t inca, inc_t lda, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
cntx_t* cntx \
);
#undef dotxf_ker_name
#define dotxf_ker_name dotxf_ref
INSERT_GENTPROT_BASIC( axpyf_ref )
#undef dotxaxpyf_ker_name
#define dotxaxpyf_ker_name dotxaxpyf_ref_var2
// Include the level-1f kernel API template.
#undef GENTPROT
#define GENTPROT( ctype, ch, varname ) \
\
void PASTEMAC(ch,varname) \
( \
conj_t conjxt, \
conj_t conjx, \
conj_t conjy, \
dim_t m, \
ctype* alpha, \
ctype* x, inc_t incx, \
ctype* y, inc_t incy, \
ctype* rho, \
ctype* z, inc_t incz, \
cntx_t* cntx \
);
INSERT_GENTPROT_BASIC( dotaxpyv_ref )
#undef GENTPROT
#define GENTPROT( ctype, ch, varname ) \
\
void PASTEMAC(ch,varname) \
( \
conj_t conjat, \
conj_t conja, \
conj_t conjw, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype* alpha, \
ctype* a, inc_t inca, inc_t lda, \
ctype* w, inc_t incw, \
ctype* x, inc_t incx, \
ctype* beta, \
ctype* y, inc_t incy, \
ctype* z, inc_t incz, \
cntx_t* cntx \
);
INSERT_GENTPROT_BASIC( dotxaxpyf_ref_var1 )
#undef GENTPROT
#define GENTPROT( ctype, ch, varname ) \
\
void PASTEMAC(ch,varname) \
( \
conj_t conjat, \
conj_t conja, \
conj_t conjw, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype* alpha, \
ctype* a, inc_t inca, inc_t lda, \
ctype* w, inc_t incw, \
ctype* x, inc_t incx, \
ctype* beta, \
ctype* y, inc_t incy, \
ctype* z, inc_t incz, \
cntx_t* cntx \
);
INSERT_GENTPROT_BASIC( dotxaxpyf_ref_var2 )
#undef GENTPROT
#define GENTPROT( ctype, ch, varname ) \
\
void PASTEMAC(ch,varname) \
( \
conj_t conjat, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype* alpha, \
ctype* a, inc_t inca, inc_t lda, \
ctype* x, inc_t incx, \
ctype* beta, \
ctype* y, inc_t incy, \
cntx_t* cntx \
);
INSERT_GENTPROT_BASIC( dotxf_ref )
#include "bli_l1f_ker.h"

View File

@@ -40,20 +40,20 @@
\
void PASTEMAC(ch,varname) \
( \
conj_t conjat, \
conj_t conja, \
conj_t conjw, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype* alpha, \
ctype* a, inc_t inca, inc_t lda, \
ctype* w, inc_t incw, \
ctype* x, inc_t incx, \
ctype* beta, \
ctype* y, inc_t incy, \
ctype* z, inc_t incz, \
cntx_t* cntx \
conj_t conjat, \
conj_t conja, \
conj_t conjw, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype* restrict alpha, \
ctype* restrict a, inc_t inca, inc_t lda, \
ctype* restrict w, inc_t incw, \
ctype* restrict x, inc_t incx, \
ctype* restrict beta, \
ctype* restrict y, inc_t incy, \
ctype* restrict z, inc_t incz, \
cntx_t* cntx \
) \
{ \
ctype* a1; \

View File

@@ -34,10 +34,10 @@
#include "blis.h"
packm_t* packm_cntl_row;
packm_t* packm_cntl_col;
packm_t* packm_cntl_row = NULL;
packm_t* packm_cntl_col = NULL;
packm_t* packm_cntl;
packm_t* packm_cntl = NULL;
void bli_packm_cntl_init()
{

View File

@@ -34,7 +34,7 @@
#include "blis.h"
scalm_t* scalm_cntl;
scalm_t* scalm_cntl = NULL;
void bli_scalm_cntl_init()
{

View File

@@ -34,7 +34,7 @@
#include "blis.h"
unpackm_t* unpackm_cntl;
unpackm_t* unpackm_cntl = NULL;
void bli_unpackm_cntl_init()
{

View File

@@ -39,17 +39,17 @@ extern packm_t* packm_cntl;
extern packv_t* packv_cntl;
extern unpackv_t* unpackv_cntl;
gemv_t* gemv_cntl_bs_ke_dot;
gemv_t* gemv_cntl_bs_ke_axpy;
gemv_t* gemv_cntl_bs_ke_dot = NULL;
gemv_t* gemv_cntl_bs_ke_axpy = NULL;
gemv_t* gemv_cntl_rp_bs_dot;
gemv_t* gemv_cntl_rp_bs_axpy;
gemv_t* gemv_cntl_rp_bs_dot = NULL;
gemv_t* gemv_cntl_rp_bs_axpy = NULL;
gemv_t* gemv_cntl_cp_bs_dot;
gemv_t* gemv_cntl_cp_bs_axpy;
gemv_t* gemv_cntl_cp_bs_dot = NULL;
gemv_t* gemv_cntl_cp_bs_axpy = NULL;
gemv_t* gemv_cntl_ge_dot;
gemv_t* gemv_cntl_ge_axpy;
gemv_t* gemv_cntl_ge_dot = NULL;
gemv_t* gemv_cntl_ge_axpy = NULL;
void bli_gemv_cntl_init()

View File

@@ -38,17 +38,17 @@ extern packm_t* packm_cntl;
extern packv_t* packv_cntl;
extern unpackm_t* unpackm_cntl;
ger_t* ger_cntl_bs_ke_row;
ger_t* ger_cntl_bs_ke_col;
ger_t* ger_cntl_bs_ke_row = NULL;
ger_t* ger_cntl_bs_ke_col = NULL;
ger_t* ger_cntl_rp_bs_row;
ger_t* ger_cntl_rp_bs_col;
ger_t* ger_cntl_rp_bs_row = NULL;
ger_t* ger_cntl_rp_bs_col = NULL;
ger_t* ger_cntl_cp_bs_row;
ger_t* ger_cntl_cp_bs_col;
ger_t* ger_cntl_cp_bs_row = NULL;
ger_t* ger_cntl_cp_bs_col = NULL;
ger_t* ger_cntl_ge_row;
ger_t* ger_cntl_ge_col;
ger_t* ger_cntl_ge_row = NULL;
ger_t* ger_cntl_ge_col = NULL;
void bli_ger_cntl_init()

View File

@@ -44,10 +44,10 @@ extern gemv_t* gemv_cntl_rp_bs_axpy;
extern gemv_t* gemv_cntl_cp_bs_dot;
extern gemv_t* gemv_cntl_cp_bs_axpy;
hemv_t* hemv_cntl_bs_ke_lrow_ucol;
hemv_t* hemv_cntl_bs_ke_lcol_urow;
hemv_t* hemv_cntl_ge_lrow_ucol;
hemv_t* hemv_cntl_ge_lcol_urow;
hemv_t* hemv_cntl_bs_ke_lrow_ucol = NULL;
hemv_t* hemv_cntl_bs_ke_lcol_urow = NULL;
hemv_t* hemv_cntl_ge_lrow_ucol = NULL;
hemv_t* hemv_cntl_ge_lcol_urow = NULL;
void bli_hemv_cntl_init()

View File

@@ -43,11 +43,11 @@ extern ger_t* ger_cntl_cp_bs_col;
extern ger_t* ger_cntl_bs_ke_row;
extern ger_t* ger_cntl_bs_ke_col;
her_t* her_cntl_bs_ke_lrow_ucol;
her_t* her_cntl_bs_ke_lcol_urow;
her_t* her_cntl_bs_ke_lrow_ucol = NULL;
her_t* her_cntl_bs_ke_lcol_urow = NULL;
her_t* her_cntl_ge_lrow_ucol;
her_t* her_cntl_ge_lcol_urow;
her_t* her_cntl_ge_lrow_ucol = NULL;
her_t* her_cntl_ge_lcol_urow = NULL;
void bli_her_cntl_init()

View File

@@ -41,11 +41,11 @@ extern unpackm_t* unpackm_cntl;
extern ger_t* ger_cntl_rp_bs_row;
extern ger_t* ger_cntl_cp_bs_col;
her2_t* her2_cntl_bs_ke_lrow_ucol;
her2_t* her2_cntl_bs_ke_lcol_urow;
her2_t* her2_cntl_bs_ke_lrow_ucol = NULL;
her2_t* her2_cntl_bs_ke_lcol_urow = NULL;
her2_t* her2_cntl_ge_lrow_ucol;
her2_t* her2_cntl_ge_lcol_urow;
her2_t* her2_cntl_ge_lrow_ucol = NULL;
her2_t* her2_cntl_ge_lcol_urow = NULL;
void bli_her2_cntl_init()

View File

@@ -43,10 +43,10 @@ extern gemv_t* gemv_cntl_rp_bs_axpy;
extern gemv_t* gemv_cntl_cp_bs_dot;
extern gemv_t* gemv_cntl_cp_bs_axpy;
trmv_t* trmv_cntl_bs_ke_nrow_tcol;
trmv_t* trmv_cntl_bs_ke_ncol_trow;
trmv_t* trmv_cntl_ge_nrow_tcol;
trmv_t* trmv_cntl_ge_ncol_trow;
trmv_t* trmv_cntl_bs_ke_nrow_tcol = NULL;
trmv_t* trmv_cntl_bs_ke_ncol_trow = NULL;
trmv_t* trmv_cntl_ge_nrow_tcol = NULL;
trmv_t* trmv_cntl_ge_ncol_trow = NULL;
void bli_trmv_cntl_init()

View File

@@ -44,10 +44,10 @@ extern gemv_t* gemv_cntl_rp_bs_axpy;
extern gemv_t* gemv_cntl_cp_bs_dot;
extern gemv_t* gemv_cntl_cp_bs_axpy;
trsv_t* trsv_cntl_bs_ke_nrow_tcol;
trsv_t* trsv_cntl_bs_ke_ncol_trow;
trsv_t* trsv_cntl_ge_nrow_tcol;
trsv_t* trsv_cntl_ge_ncol_trow;
trsv_t* trsv_cntl_bs_ke_nrow_tcol = NULL;
trsv_t* trsv_cntl_bs_ke_ncol_trow = NULL;
trsv_t* trsv_cntl_ge_nrow_tcol = NULL;
trsv_t* trsv_cntl_ge_ncol_trow = NULL;
void bli_trsv_cntl_init()

View File

@@ -40,9 +40,8 @@
void bli_gemm_cntx_init( cntx_t* cntx )
{
bli_cntx_obj_create( cntx );
//bli_cntx_obj_clear( cntx );
// Clear the context fields.
bli_cntx_obj_clear( cntx );
// Initialize the context with the current architecture's native
// level-3 gemm micro-kernel, and its output preferences.
@@ -71,16 +70,14 @@ void bli_gemm_cntx_init( cntx_t* cntx )
void bli_gemm_cntx_finalize( cntx_t* cntx )
{
bli_cntx_obj_free( cntx );
}
// -----------------------------------------------------------------------------
void bli_trsm_cntx_init( cntx_t* cntx )
{
bli_cntx_obj_create( cntx );
//bli_cntx_obj_clear( cntx );
// Clear the context fields.
bli_cntx_obj_clear( cntx );
// Initialize the context with the current architecture's native
// level-3 gemm micro-kernel, and its output preferences.
@@ -116,6 +113,5 @@ void bli_trsm_cntx_init( cntx_t* cntx )
void bli_trsm_cntx_finalize( cntx_t* cntx )
{
bli_cntx_obj_free( cntx );
}

View File

@@ -36,15 +36,15 @@
extern scalm_t* scalm_cntl;
packm_t* gemm_packa_cntl;
packm_t* gemm_packb_cntl;
packm_t* gemm_packa_cntl = NULL;
packm_t* gemm_packb_cntl = NULL;
gemm_t* gemm_cntl_bp_ke;
gemm_t* gemm_cntl_op_bp;
gemm_t* gemm_cntl_mm_op;
gemm_t* gemm_cntl_vl_mm;
gemm_t* gemm_cntl_bp_ke = NULL;
gemm_t* gemm_cntl_op_bp = NULL;
gemm_t* gemm_cntl_mm_op = NULL;
gemm_t* gemm_cntl_vl_mm = NULL;
gemm_t* gemm_cntl;
gemm_t* gemm_cntl = NULL;
void bli_gemm_cntl_init()
{

View File

@@ -322,9 +322,11 @@ void PASTEMAC(ch,varname) \
} \
} \
\
/*PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: c", MR, NR, c11, rs_c, cs_c, "%4.1f", "" );*/ \
/*PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: b1", k, NR, b1, NR, 1, "%4.1f", "" ); \
PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: a1", MR, k, a1, 1, MR, "%4.1f", "" );*/ \
/*
PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: b1", k, NR, b1, NR, 1, "%4.1f", "" ); \
PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: a1", MR, k, a1, 1, MR, "%4.1f", "" ); \
PASTEMAC(ch,fprintm)( stdout, "gemm_ker_var2: c after", m_cur, n_cur, c11, rs_c, cs_c, "%4.1f", "" ); \
*/ \
}
INSERT_GENTFUNC_BASIC0( gemm_ker_var2 )

View File

@@ -38,24 +38,24 @@ extern scalm_t* scalm_cntl;
extern gemm_t* gemm_cntl_bp_ke;
packm_t* trsm_l_packa_cntl;
packm_t* trsm_l_packb_cntl;
packm_t* trsm_l_packa_cntl = NULL;
packm_t* trsm_l_packb_cntl = NULL;
packm_t* trsm_r_packa_cntl;
packm_t* trsm_r_packb_cntl;
packm_t* trsm_r_packa_cntl = NULL;
packm_t* trsm_r_packb_cntl = NULL;
trsm_t* trsm_cntl_bp_ke;
trsm_t* trsm_cntl_bp_ke = NULL;
trsm_t* trsm_l_cntl_op_bp;
trsm_t* trsm_l_cntl_mm_op;
trsm_t* trsm_l_cntl_vl_mm;
trsm_t* trsm_l_cntl_op_bp = NULL;
trsm_t* trsm_l_cntl_mm_op = NULL;
trsm_t* trsm_l_cntl_vl_mm = NULL;
trsm_t* trsm_r_cntl_op_bp;
trsm_t* trsm_r_cntl_mm_op;
trsm_t* trsm_r_cntl_vl_mm;
trsm_t* trsm_r_cntl_op_bp = NULL;
trsm_t* trsm_r_cntl_mm_op = NULL;
trsm_t* trsm_r_cntl_vl_mm = NULL;
trsm_t* trsm_l_cntl;
trsm_t* trsm_r_cntl;
trsm_t* trsm_l_cntl = NULL;
trsm_t* trsm_r_cntl = NULL;
void bli_trsm_cntl_init()

View File

@@ -56,59 +56,16 @@ void bli_cntx_obj_free( cntx_t* cntx )
void bli_cntx_obj_clear( cntx_t* cntx )
{
blksz_t* blkszs = bli_cntx_blkszs_buf( cntx );
bszid_t* bmults = bli_cntx_bmults_buf( cntx );
func_t* l3_vir_ukrs = bli_cntx_l3_vir_ukrs_buf( cntx );
func_t* l3_nat_ukrs = bli_cntx_l3_nat_ukrs_buf( cntx );
mbool_t* l3_nat_ukrs_prefs = bli_cntx_l3_nat_ukrs_prefs_buf( cntx );
func_t* l1f_kers = bli_cntx_l1f_kers_buf( cntx );
func_t* l1v_kers = bli_cntx_l1v_kers_buf( cntx );
func_t* packm_ukrs = bli_cntx_packm_ukrs_buf( cntx );
dim_t i;
// Initialize all of the elements of every array to a sane initial
// value. (Strictly speaking, there is no "null" value for typedef'ed
// enums such as bszid_t, so we cheat a little by using 0.)
func_t null_func = { { NULL, NULL, NULL, NULL } };
blksz_t null_blksz = { { 0, 0, 0, 0, } };
mbool_t null_mbool = { { FALSE, FALSE, FALSE, FALSE } };
bszid_t null_bszid = 0;
for ( i = 0; i < BLIS_NUM_BLKSZS; ++i )
{
blkszs[ i ] = null_blksz;
}
for ( i = 0; i < BLIS_NUM_BLKSZS; ++i )
{
bmults[ i ] = null_bszid;
}
for ( i = 0; i < BLIS_NUM_LEVEL3_UKRS; ++i )
{
l3_vir_ukrs[ i ] = null_func;
l3_nat_ukrs[ i ] = null_func;
l3_nat_ukrs_prefs[ i ] = null_mbool;
}
for ( i = 0; i < BLIS_NUM_LEVEL1F_KERS; ++i )
{
l1f_kers[ i ] = null_func;
}
for ( i = 0; i < BLIS_NUM_LEVEL1V_KERS; ++i )
{
l1v_kers[ i ] = null_func;
}
{
packm_ukrs[ 0 ] = null_func;
}
// NOTE: It doesn't make sense to initialize method or schema fields
// at this time; the method field would normally be set by _set_blkszs()
// and the schema fields are set by _set_pack_schema_[abc]().
// Fill the entire cntx_t structure with zeros.
memset( ( void* )cntx, 0, sizeof( cntx ) );
}
void bli_cntx_init( cntx_t* cntx )
{
// This function initializes a "universal" context that is pre-loaded
// with kernel addresses for all level-1v, -1f, and -3 kernels, in
// addition to all level-1f and -3 blocksizes.
bli_gks_cntx_set_l3_nat_ukr( BLIS_GEMM_UKR, cntx );
bli_gks_cntx_set_l3_nat_ukr( BLIS_GEMMTRSM_L_UKR, cntx );
bli_gks_cntx_set_l3_nat_ukr( BLIS_GEMMTRSM_U_UKR, cntx );
@@ -161,6 +118,7 @@ blksz_t* bli_cntx_get_blksz( bszid_t bs_id,
return blksz;
}
#if 0
dim_t bli_cntx_get_blksz_def_dt( num_t dt,
bszid_t bs_id,
cntx_t* cntx )
@@ -182,6 +140,7 @@ dim_t bli_cntx_get_blksz_max_dt( num_t dt,
// Return the default blocksize value for the datatype given.
return bli_blksz_get_max( dt, blksz );
}
#endif
blksz_t* bli_cntx_get_bmult( bszid_t bs_id,
cntx_t* cntx )
@@ -196,6 +155,7 @@ blksz_t* bli_cntx_get_bmult( bszid_t bs_id,
return bmult;
}
#if 0
dim_t bli_cntx_get_bmult_dt( num_t dt,
bszid_t bs_id,
cntx_t* cntx )
@@ -203,21 +163,8 @@ dim_t bli_cntx_get_bmult_dt( num_t dt,
blksz_t* bmult = bli_cntx_get_bmult( bs_id, cntx );
return bli_blksz_get_def( dt, bmult );
#if 0
blksz_t* blkszs = bli_cntx_blkszs_buf( cntx );
bszid_t* bmults = bli_cntx_bmults_buf( cntx );
bszid_t bm_id = bmults[ bs_id ];
// A little hack to ensure we don't try to access a blocksize object
// using an uninitialized/garbage value in the bmults array (which
// may exist because that blocksize in the context was never set).
if ( bm_id < BLIS_BSZID_LO && BLIS_BSZID_HI < bm_id ) return 0;
blksz_t* bmult = &blkszs[ bm_id ];
return bli_blksz_get_def( dt, bmult );
#endif
}
#endif
func_t* bli_cntx_get_l3_ukr( l3ukr_t ukr_id,
cntx_t* cntx )
@@ -240,6 +187,7 @@ func_t* bli_cntx_get_l3_ukr( l3ukr_t ukr_id,
return l3_ukr;
}
#if 0
void* bli_cntx_get_l3_ukr_dt( num_t dt,
l3ukr_t ukr_id,
cntx_t* cntx )
@@ -260,6 +208,7 @@ void* bli_cntx_get_l3_ukr_dt( num_t dt,
return bli_func_get_dt( dt, l3_ukr );
}
#endif
func_t* bli_cntx_get_l3_vir_ukr( l3ukr_t ukr_id,
cntx_t* cntx )
@@ -272,6 +221,7 @@ func_t* bli_cntx_get_l3_vir_ukr( l3ukr_t ukr_id,
return l3_vir_ukr;
}
#if 0
void* bli_cntx_get_l3_vir_ukr_dt( num_t dt,
l3ukr_t ukr_id,
cntx_t* cntx )
@@ -283,6 +233,7 @@ void* bli_cntx_get_l3_vir_ukr_dt( num_t dt,
// identified by ukr_id.
return bli_func_get_dt( dt, l3_vir_ukr );
}
#endif
func_t* bli_cntx_get_l3_nat_ukr( l3ukr_t ukr_id,
cntx_t* cntx )
@@ -295,6 +246,7 @@ func_t* bli_cntx_get_l3_nat_ukr( l3ukr_t ukr_id,
return l3_nat_ukr;
}
#if 0
void* bli_cntx_get_l3_nat_ukr_dt( num_t dt,
l3ukr_t ukr_id,
cntx_t* cntx )
@@ -306,6 +258,7 @@ void* bli_cntx_get_l3_nat_ukr_dt( num_t dt,
// identified by ukr_id.
return bli_func_get_dt( dt, l3_nat_ukr );
}
#endif
func_t* bli_cntx_get_l1f_ker( l1fkr_t ker_id,
cntx_t* cntx )
@@ -318,6 +271,7 @@ func_t* bli_cntx_get_l1f_ker( l1fkr_t ker_id,
return l1f_ker;
}
#if 0
void* bli_cntx_get_l1f_ker_dt( num_t dt,
l1fkr_t ker_id,
cntx_t* cntx )
@@ -327,6 +281,7 @@ void* bli_cntx_get_l1f_ker_dt( num_t dt,
return bli_func_get_dt( dt, l1f_ker );
}
#endif
func_t* bli_cntx_get_l1v_ker( l1vkr_t ker_id,
cntx_t* cntx )
@@ -339,6 +294,7 @@ func_t* bli_cntx_get_l1v_ker( l1vkr_t ker_id,
return l1v_ker;
}
#if 0
void* bli_cntx_get_l1v_ker_dt( num_t dt,
l1vkr_t ker_id,
cntx_t* cntx )
@@ -348,6 +304,7 @@ void* bli_cntx_get_l1v_ker_dt( num_t dt,
return bli_func_get_dt( dt, l1v_ker );
}
#endif
mbool_t* bli_cntx_get_l3_nat_ukr_prefs( l3ukr_t ukr_id,
cntx_t* cntx )
@@ -367,6 +324,7 @@ func_t* bli_cntx_get_packm_ukr( cntx_t* cntx )
return packm_ukrs;
}
#if 0
ind_t bli_cntx_get_ind_method( cntx_t* cntx )
{
return bli_cntx_method( cntx );
@@ -381,6 +339,7 @@ pack_t bli_cntx_get_pack_schema_b( cntx_t* cntx )
{
return bli_cntx_schema_b( cntx );
}
#endif
// -----------------------------------------------------------------------------

View File

@@ -66,55 +66,55 @@ typedef struct cntx_s
#define bli_cntx_blkszs_buf( cntx ) \
\
( cntx->blkszs )
( (cntx)->blkszs )
#define bli_cntx_bmults_buf( cntx ) \
\
( cntx->bmults )
( (cntx)->bmults )
#define bli_cntx_l3_vir_ukrs_buf( cntx ) \
\
( cntx->l3_vir_ukrs )
( (cntx)->l3_vir_ukrs )
#define bli_cntx_l3_nat_ukrs_buf( cntx ) \
\
( cntx->l3_nat_ukrs )
( (cntx)->l3_nat_ukrs )
#define bli_cntx_l3_nat_ukrs_prefs_buf( cntx ) \
\
( cntx->l3_nat_ukrs_prefs )
( (cntx)->l3_nat_ukrs_prefs )
#define bli_cntx_l1f_kers_buf( cntx ) \
\
( cntx->l1f_kers )
( (cntx)->l1f_kers )
#define bli_cntx_l1v_kers_buf( cntx ) \
\
( cntx->l1v_kers )
( (cntx)->l1v_kers )
#define bli_cntx_packm_ukrs_buf( cntx ) \
\
(&(cntx->packm_ukrs) )
(&((cntx)->packm_ukrs) )
#define bli_cntx_packm_ukrs( cntx ) \
\
(&(cntx->packm_ukrs) )
(&((cntx)->packm_ukrs) )
#define bli_cntx_method( cntx ) \
\
( cntx->method )
( (cntx)->method )
#define bli_cntx_schema_a( cntx ) \
\
( cntx->schema_a )
( (cntx)->schema_a )
#define bli_cntx_schema_b( cntx ) \
\
( cntx->schema_b )
( (cntx)->schema_b )
#define bli_cntx_schema_c( cntx ) \
\
( cntx->schema_c )
( (cntx)->schema_c )
// cntx_t modification (fields only)
@@ -178,13 +178,99 @@ typedef struct cntx_s
(cntx_p)->schema_c = _schema_c; \
}
// cntx_t query (complex)
#define bli_cntx_get_blksz_def_dt( dt, bs_id, cntx ) \
\
bli_blksz_get_def \
( \
(dt), (&(bli_cntx_blkszs_buf( (cntx) ))[ bs_id ]) \
)
#define bli_cntx_get_blksz_max_dt( dt, bs_id, cntx ) \
\
bli_blksz_get_max \
( \
(dt), (&(bli_cntx_blkszs_buf( (cntx) ))[ bs_id ]) \
)
#define bli_cntx_get_bmult_dt( dt, bs_id, cntx ) \
\
bli_blksz_get_def \
( \
(dt), \
(&(bli_cntx_blkszs_buf( (cntx) )) \
[ \
(bli_cntx_bmults_buf( (cntx) ))[ bs_id ] \
]) \
)
#define bli_cntx_get_l3_ukr_dt( dt, ukr_id, cntx ) \
\
bli_func_get_dt \
( \
(dt), \
&(( \
bli_cntx_method( (cntx) ) != BLIS_NAT \
? bli_cntx_l3_vir_ukrs_buf( (cntx) ) \
: bli_cntx_l3_nat_ukrs_buf( (cntx) ) \
)[ ukr_id ]) \
)
#define bli_cntx_get_l3_vir_ukr_dt( dt, ukr_id, cntx ) \
\
bli_func_get_dt \
( \
(dt), (&(bli_cntx_l3_vir_ukrs_buf( (cntx) ))[ ukr_id ]) \
)
#define bli_cntx_get_l3_nat_ukr_dt( dt, ukr_id, cntx ) \
\
bli_func_get_dt \
( \
(dt), (&(bli_cntx_l3_nat_ukrs_buf( (cntx) ))[ ukr_id ]) \
)
#define bli_cntx_get_l1f_ker_dt( dt, ker_id, cntx ) \
\
bli_func_get_dt \
( \
(dt), (&(bli_cntx_l1f_kers_buf( (cntx) ))[ ker_id ]) \
)
#define bli_cntx_get_l1v_ker_dt( dt, ker_id, cntx ) \
\
bli_func_get_dt \
( \
(dt), (&(bli_cntx_l1v_kers_buf( (cntx) ))[ ker_id ]) \
)
#define bli_cntx_get_l3_nat_ukr_prefs_dt( dt, ukr_id, cntx ) \
\
bli_mbool_get_dt \
( \
(dt), (&(bli_cntx_l3_nat_ukrs_prefs_buf( (cntx) ))[ ukr_id ]) \
)
#define bli_cntx_get_ind_method( cntx ) \
\
bli_cntx_method( cntx )
#define bli_cntx_get_pack_schema_a( cntx ) \
\
bli_cntx_schema_a( cntx )
#define bli_cntx_get_pack_schema_b( cntx ) \
\
bli_cntx_schema_b( cntx )
// -----------------------------------------------------------------------------
// create/free
//void bli_cntx_obj_create( cntx_t* cntx );
//void bli_cntx_obj_copy( cntx_t* src,
// cntx_t* dst );
//void bli_cntx_obj_free( cntx_t* cntx );
void bli_cntx_obj_clear( cntx_t* cntx );
void bli_cntx_init( cntx_t* cntx );
@@ -193,49 +279,53 @@ void bli_cntx_init( cntx_t* cntx );
blksz_t* bli_cntx_get_blksz( bszid_t bs_id,
cntx_t* cntx );
dim_t bli_cntx_get_blksz_def_dt( num_t dt,
bszid_t bs_id,
cntx_t* cntx );
dim_t bli_cntx_get_blksz_max_dt( num_t dt,
bszid_t bs_id,
cntx_t* cntx );
blksz_t* bli_cntx_get_bmult( bszid_t bs_id,
cntx_t* cntx );
dim_t bli_cntx_get_bmult_dt( num_t dt,
bszid_t bs_id,
cntx_t* cntx );
func_t* bli_cntx_get_l3_ukr( l3ukr_t ukr_id,
cntx_t* cntx );
void* bli_cntx_get_l3_ukr_dt( num_t dt,
l3ukr_t ukr_id,
cntx_t* cntx );
func_t* bli_cntx_get_l3_vir_ukr( l3ukr_t ukr_id,
cntx_t* cntx );
void* bli_cntx_get_l3_vir_ukr_dt( num_t dt,
l3ukr_t ukr_id,
cntx_t* cntx );
func_t* bli_cntx_get_l3_nat_ukr( l3ukr_t ukr_id,
cntx_t* cntx );
void* bli_cntx_get_l3_nat_ukr_dt( num_t dt,
l3ukr_t ukr_id,
cntx_t* cntx );
mbool_t* bli_cntx_get_l3_nat_ukr_prefs( l3ukr_t ukr_id,
cntx_t* cntx );
func_t* bli_cntx_get_l1f_ker( l1fkr_t ker_id,
cntx_t* cntx );
void* bli_cntx_get_l1f_ker_dt( num_t dt,
l1fkr_t ker_id,
cntx_t* cntx );
func_t* bli_cntx_get_l1v_ker( l1vkr_t ker_id,
cntx_t* cntx );
void* bli_cntx_get_l1v_ker_dt( num_t dt,
l1vkr_t ker_id,
cntx_t* cntx );
func_t* bli_cntx_get_packm_ukr( cntx_t* cntx );
ind_t bli_cntx_get_ind_method( cntx_t* cntx );
pack_t bli_cntx_get_pack_schema_a( cntx_t* cntx );
pack_t bli_cntx_get_pack_schema_b( cntx_t* cntx );
pack_t bli_cntx_get_pack_schema_c( cntx_t* cntx );
//dim_t bli_cntx_get_blksz_def_dt( num_t dt,
// bszid_t bs_id,
// cntx_t* cntx );
//dim_t bli_cntx_get_blksz_max_dt( num_t dt,
// bszid_t bs_id,
// cntx_t* cntx );
//dim_t bli_cntx_get_bmult_dt( num_t dt,
// bszid_t bs_id,
// cntx_t* cntx );
//void* bli_cntx_get_l3_ukr_dt( num_t dt,
// l3ukr_t ukr_id,
// cntx_t* cntx );
//void* bli_cntx_get_l3_vir_ukr_dt( num_t dt,
// l3ukr_t ukr_id,
// cntx_t* cntx );
//void* bli_cntx_get_l3_nat_ukr_dt( num_t dt,
// l3ukr_t ukr_id,
// cntx_t* cntx );
//bool_t bli_cntx_get_l3_nat_ukr_prefs_dt( num_t dt,
// l3ukr_t ukr_id,
// cntx_t* cntx );
//void* bli_cntx_get_l1f_ker_dt( num_t dt,
// l1fkr_t ker_id,
// cntx_t* cntx );
//void* bli_cntx_get_l1v_ker_dt( num_t dt,
// l1vkr_t ker_id,
// cntx_t* cntx );
//ind_t bli_cntx_get_ind_method( cntx_t* cntx );
//pack_t bli_cntx_get_pack_schema_a( cntx_t* cntx );
//pack_t bli_cntx_get_pack_schema_b( cntx_t* cntx );
//pack_t bli_cntx_get_pack_schema_c( cntx_t* cntx );
// set functions
@@ -294,10 +384,17 @@ void bli_cntx_print( cntx_t* cntx );
// Preprocess out these calls entirely, since they are currently just empty
// functions that do nothing.
//#define bli_cntx_obj_create( cntx ) { bli_cntx_obj_clear( cntx ); }
//#define bli_cntx_obj_free( cntx ) { bli_cntx_obj_clear( cntx ); }
#define bli_cntx_obj_create( cntx ) { ; }
#define bli_cntx_obj_free( cntx ) { ; }
#if 0
#define bli_cntx_obj_create( cntx ) { bli_cntx_obj_clear( cntx ); }
#define bli_cntx_obj_free( cntx ) { bli_cntx_obj_clear( cntx ); }
#else
#define bli_cntx_obj_create( cntx ) { ; }
#define bli_cntx_obj_free( cntx ) { ; }
#endif
// These macros initialize/finalize a local context if the given context
// pointer is NULL. When initializing, the context address that should
// be used (local or external) is assigned to cntx_p.
#define bli_cntx_init_local_if( opname, cntx, cntx_p ) \
\

View File

@@ -34,13 +34,13 @@
#include "blis.h"
obj_t BLIS_TWO;
obj_t BLIS_ONE;
obj_t BLIS_ONE_HALF;
obj_t BLIS_ZERO;
obj_t BLIS_MINUS_ONE_HALF;
obj_t BLIS_MINUS_ONE;
obj_t BLIS_MINUS_TWO;
obj_t BLIS_TWO = {};
obj_t BLIS_ONE = {};
obj_t BLIS_ONE_HALF = {};
obj_t BLIS_ZERO = {};
obj_t BLIS_MINUS_ONE_HALF = {};
obj_t BLIS_MINUS_ONE = {};
obj_t BLIS_MINUS_TWO = {};
static bool_t bli_const_is_init = FALSE;

View File

@@ -35,11 +35,11 @@
#include "blis.h"
char *bli_optarg;
char *bli_optarg = NULL;
int bli_optind = 1;
int bli_opterr = 0;
int bli_optopt;
int bli_optopt = 0;
static char OPT_MARKER = '-';

View File

@@ -754,6 +754,9 @@ static func_t bli_gks_l1v_kers[BLIS_NUM_LEVEL1V_KERS] =
/* addv */ { { BLIS_SADDV_KERNEL, BLIS_CADDV_KERNEL,
BLIS_DADDV_KERNEL, BLIS_ZADDV_KERNEL, }
},
/* axpbyv */ { { BLIS_SAXPBYV_KERNEL, BLIS_CAXPBYV_KERNEL,
BLIS_DAXPBYV_KERNEL, BLIS_ZAXPBYV_KERNEL, }
},
/* axpyv */ { { BLIS_SAXPYV_KERNEL, BLIS_CAXPYV_KERNEL,
BLIS_DAXPYV_KERNEL, BLIS_ZAXPYV_KERNEL, }
},
@@ -784,6 +787,9 @@ static func_t bli_gks_l1v_kers[BLIS_NUM_LEVEL1V_KERS] =
/* swapv */ { { BLIS_SSWAPV_KERNEL, BLIS_CSWAPV_KERNEL,
BLIS_DSWAPV_KERNEL, BLIS_ZSWAPV_KERNEL, }
},
/* xpbyv */ { { BLIS_SXPBYV_KERNEL, BLIS_CXPBYV_KERNEL,
BLIS_DXPBYV_KERNEL, BLIS_ZXPBYV_KERNEL, }
},
};
static func_t bli_gks_l1v_ref_kers[BLIS_NUM_LEVEL1V_KERS] =
@@ -792,6 +798,9 @@ static func_t bli_gks_l1v_ref_kers[BLIS_NUM_LEVEL1V_KERS] =
/* addv */ { { BLIS_SADDV_KERNEL_REF, BLIS_CADDV_KERNEL_REF,
BLIS_DADDV_KERNEL_REF, BLIS_ZADDV_KERNEL_REF, }
},
/* axpbyv */ { { BLIS_SAXPBYV_KERNEL_REF, BLIS_CAXPBYV_KERNEL_REF,
BLIS_DAXPBYV_KERNEL_REF, BLIS_ZAXPBYV_KERNEL_REF, }
},
/* axpyv */ { { BLIS_SAXPYV_KERNEL_REF, BLIS_CAXPYV_KERNEL_REF,
BLIS_DAXPYV_KERNEL_REF, BLIS_ZAXPYV_KERNEL_REF, }
},
@@ -822,6 +831,9 @@ static func_t bli_gks_l1v_ref_kers[BLIS_NUM_LEVEL1V_KERS] =
/* swapv */ { { BLIS_SSWAPV_KERNEL_REF, BLIS_CSWAPV_KERNEL_REF,
BLIS_DSWAPV_KERNEL_REF, BLIS_ZSWAPV_KERNEL_REF, }
},
/* xpbyv */ { { BLIS_SXPBYV_KERNEL_REF, BLIS_CXPBYV_KERNEL_REF,
BLIS_DXPBYV_KERNEL_REF, BLIS_ZXPBYV_KERNEL_REF, }
},
};
// -----------------------------------------------------------------------------

View File

@@ -36,10 +36,10 @@
static bool_t bli_thread_is_init = FALSE;
packm_thrinfo_t BLIS_PACKM_SINGLE_THREADED;
gemm_thrinfo_t BLIS_GEMM_SINGLE_THREADED;
herk_thrinfo_t BLIS_HERK_SINGLE_THREADED;
thread_comm_t BLIS_SINGLE_COMM;
packm_thrinfo_t BLIS_PACKM_SINGLE_THREADED = {};
gemm_thrinfo_t BLIS_GEMM_SINGLE_THREADED = {};
herk_thrinfo_t BLIS_HERK_SINGLE_THREADED = {};
thread_comm_t BLIS_SINGLE_COMM = {};
void bli_thread_init( void )
{

View File

@@ -43,8 +43,8 @@
\
f77_int PASTEF772(i,chx,blasname) \
( \
f77_int* n, \
ftype_x* x, f77_int* incx \
const f77_int* n, \
const ftype_x* x, const f77_int* incx \
) \
{ \
dim_t n0; \
@@ -68,7 +68,7 @@ f77_int PASTEF772(i,chx,blasname) \
\
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */ \
bli_convert_blas_incv( n0, x, *incx, x0, incx0 ); \
bli_convert_blas_incv( n0, (ftype_x*)x, *incx, x0, incx0 ); \
\
/* Call BLIS interface. */ \
PASTEMAC(chx,blisname) \

View File

@@ -41,8 +41,8 @@
\
f77_int PASTEF772(i,chx,blasname) \
( \
f77_int* n, \
ftype_x* x, f77_int* incx \
const f77_int* n, \
const ftype_x* x, const f77_int* incx \
);
#ifdef BLIS_ENABLE_BLAS2BLIS

View File

@@ -43,8 +43,8 @@
\
ftype_r PASTEF772(chr,chx,blasname) \
( \
f77_int* n, \
ftype_x* x, f77_int* incx \
const f77_int* n, \
const ftype_x* x, const f77_int* incx \
) \
{ \
dim_t n0; \
@@ -61,7 +61,7 @@ ftype_r PASTEF772(chr,chx,blasname) \
\
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */ \
bli_convert_blas_incv( n0, x, *incx, x0, incx0 ); \
bli_convert_blas_incv( n0, (ftype_x*)x, *incx, x0, incx0 ); \
\
/* Call BLIS interface. */ \
PASTEMAC(chx,blisname) \

View File

@@ -41,8 +41,8 @@
\
ftype_r PASTEF772(chr,chx,blasname) \
( \
f77_int* n, \
ftype_x* x, f77_int* incx \
const f77_int* n, \
const ftype_x* x, const f77_int* incx \
);
#ifdef BLIS_ENABLE_BLAS2BLIS

View File

@@ -43,10 +43,10 @@
\
void PASTEF77(ch,blasname) \
( \
f77_int* n, \
ftype* alpha, \
ftype* x, f77_int* incx, \
ftype* y, f77_int* incy \
const f77_int* n, \
const ftype* alpha, \
const ftype* x, const f77_int* incx, \
ftype* y, const f77_int* incy \
) \
{ \
dim_t n0; \
@@ -64,15 +64,15 @@ void PASTEF77(ch,blasname) \
\
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */ \
bli_convert_blas_incv( n0, x, *incx, x0, incx0 ); \
bli_convert_blas_incv( n0, y, *incy, y0, incy0 ); \
bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \
bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \
\
/* Call BLIS interface. */ \
PASTEMAC(ch,blisname) \
( \
BLIS_NO_CONJUGATE, \
n0, \
alpha, \
(ftype*)alpha, \
x0, incx0, \
y0, incy0, \
NULL \

View File

@@ -41,10 +41,10 @@
\
void PASTEF77(ch,blasname) \
( \
f77_int* n, \
ftype* alpha, \
ftype* x, f77_int* incx, \
ftype* y, f77_int* incy \
const f77_int* n, \
const ftype* alpha, \
const ftype* x, const f77_int* incx, \
ftype* y, const f77_int* incy \
);
#ifdef BLIS_ENABLE_BLAS2BLIS

View File

@@ -43,9 +43,9 @@
\
void PASTEF77(ch,blasname) \
( \
f77_int* n, \
ftype* x, f77_int* incx, \
ftype* y, f77_int* incy \
const f77_int* n, \
const ftype* x, const f77_int* incx, \
ftype* y, const f77_int* incy \
) \
{ \
dim_t n0; \
@@ -63,8 +63,8 @@ void PASTEF77(ch,blasname) \
\
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */ \
bli_convert_blas_incv( n0, x, *incx, x0, incx0 ); \
bli_convert_blas_incv( n0, y, *incy, y0, incy0 ); \
bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \
bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \
\
/* Call BLIS interface. */ \
PASTEMAC(ch,blisname) \

View File

@@ -41,9 +41,9 @@
\
void PASTEF77(ch,blasname) \
( \
f77_int* n, \
ftype* x, f77_int* incx, \
ftype* y, f77_int* incy \
const f77_int* n, \
const ftype* x, const f77_int* incx, \
ftype* y, const f77_int* incy \
);
#ifdef BLIS_ENABLE_BLAS2BLIS

View File

@@ -43,9 +43,9 @@
\
ftype PASTEF772(chxy,blasname,chc) \
( \
f77_int* n, \
ftype* x, f77_int* incx, \
ftype* y, f77_int* incy \
const f77_int* n, \
const ftype* x, const f77_int* incx, \
const ftype* y, const f77_int* incy \
) \
{ \
dim_t n0; \
@@ -64,8 +64,8 @@ ftype PASTEF772(chxy,blasname,chc) \
\
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */ \
bli_convert_blas_incv( n0, x, *incx, x0, incx0 ); \
bli_convert_blas_incv( n0, y, *incy, y0, incy0 ); \
bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \
bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \
\
/* Call BLIS interface. */ \
PASTEMAC(chxy,blisname) \
@@ -95,9 +95,9 @@ INSERT_GENTFUNCDOT_BLAS( dot, dotv )
// with result returned in single precision.
float PASTEF77(sd,sdot)
(
f77_int* n,
float* x, f77_int* incx,
float* y, f77_int* incy
const f77_int* n,
const float* x, const f77_int* incx,
const float* y, const f77_int* incy
)
{
return ( float )PASTEF77(d,sdot)( n,
@@ -109,9 +109,9 @@ float PASTEF77(sd,sdot)
// with result returned in double precision.
double PASTEF77(d,sdot)
(
f77_int* n,
float* x, f77_int* incx,
float* y, f77_int* incy
const f77_int* n,
const float* x, const f77_int* incx,
const float* y, const f77_int* incy
)
{
dim_t n0;
@@ -129,8 +129,8 @@ double PASTEF77(d,sdot)
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */
bli_convert_blas_incv( n0, x, *incx, x0, incx0 );
bli_convert_blas_incv( n0, y, *incy, y0, incy0 );
bli_convert_blas_incv( n0, (float*)x, *incx, x0, incx0 );
bli_convert_blas_incv( n0, (float*)y, *incy, y0, incy0 );
rho = 0.0;

View File

@@ -41,9 +41,9 @@
\
ftype PASTEF772(chxy,blasname,chc) \
( \
f77_int* n, \
ftype* x, f77_int* incx, \
ftype* y, f77_int* incy \
const f77_int* n, \
const ftype* x, const f77_int* incx, \
const ftype* y, const f77_int* incy \
);
#ifdef BLIS_ENABLE_BLAS2BLIS
@@ -54,15 +54,15 @@ INSERT_GENTPROTDOT_BLAS( dot )
float PASTEF77(sd,sdot)
(
f77_int* n,
float* x, f77_int* incx,
float* y, f77_int* incy
const f77_int* n,
const float* x, const f77_int* incx,
const float* y, const f77_int* incy
);
double PASTEF77(d,sdot)
(
f77_int* n,
float* x, f77_int* incx,
float* y, f77_int* incy
const f77_int* n,
const float* x, const f77_int* incx,
const float* y, const f77_int* incy
);
#endif

View File

@@ -43,16 +43,16 @@
\
void PASTEF77(ch,blasname) \
( \
f77_char* transa, \
f77_char* transb, \
f77_int* m, \
f77_int* n, \
f77_int* k, \
ftype* alpha, \
ftype* a, f77_int* lda, \
ftype* b, f77_int* ldb, \
ftype* beta, \
ftype* c, f77_int* ldc \
const f77_char* transa, \
const f77_char* transb, \
const f77_int* m, \
const f77_int* n, \
const f77_int* k, \
const ftype* alpha, \
const ftype* a, const f77_int* lda, \
const ftype* b, const f77_int* ldb, \
const ftype* beta, \
ftype* c, const f77_int* ldc \
) \
{ \
trans_t blis_transa; \
@@ -106,11 +106,11 @@ void PASTEF77(ch,blasname) \
m0, \
n0, \
k0, \
alpha, \
a, rs_a, cs_a, \
b, rs_b, cs_b, \
beta, \
c, rs_c, cs_c, \
(ftype*)alpha, \
(ftype*)a, rs_a, cs_a, \
(ftype*)b, rs_b, cs_b, \
(ftype*)beta, \
(ftype*)c, rs_c, cs_c, \
NULL \
); \
\

View File

@@ -41,16 +41,16 @@
\
void PASTEF77(ch,blasname) \
( \
f77_char* transa, \
f77_char* transb, \
f77_int* m, \
f77_int* n, \
f77_int* k, \
ftype* alpha, \
ftype* a, f77_int* lda, \
ftype* b, f77_int* ldb, \
ftype* beta, \
ftype* c, f77_int* ldc \
const f77_char* transa, \
const f77_char* transb, \
const f77_int* m, \
const f77_int* n, \
const f77_int* k, \
const ftype* alpha, \
const ftype* a, const f77_int* lda, \
const ftype* b, const f77_int* ldb, \
const ftype* beta, \
ftype* c, const f77_int* ldc \
);
#ifdef BLIS_ENABLE_BLAS2BLIS

Some files were not shown because too many files have changed in this diff Show More