mirror of
https://github.com/amd/blis.git
synced 2026-07-01 03:37:27 +00:00
Merge branch 'master' into dev
This commit is contained in:
@@ -10,7 +10,7 @@ install:
|
||||
- call %CONDA_INSTALL_LOCN%\Scripts\activate.bat
|
||||
- conda update --yes --quiet conda
|
||||
- conda config --add channels conda-forge
|
||||
- conda install --yes clangdev posix m2-make pthreads-win32
|
||||
- conda install --yes clangdev posix m2-make
|
||||
- call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64
|
||||
- set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%"
|
||||
- set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%"
|
||||
@@ -22,10 +22,10 @@ build_script:
|
||||
- set RANLIB=echo
|
||||
- set AR=llvm-ar
|
||||
- set AS=llvm-as
|
||||
- set LIBPTHREAD=-lpthreads
|
||||
- set LIBPTHREAD=
|
||||
- set "PATH=%PATH%;C:\projects\blis\lib\%CONFIG%"
|
||||
- set "CFLAGS=-Wno-macro-redefined"
|
||||
- bash -lc "source activate && cd /c/projects/blis && ./configure %CONFIGURE_OPTS% --enable-cblas --enable-threading=pthreads --enable-arg-max-hack --prefix=/c/blis %CONFIG%"
|
||||
- bash -lc "source activate && cd /c/projects/blis && ./configure %CONFIGURE_OPTS% --enable-cblas --disable-threading --enable-arg-max-hack --prefix=/c/blis %CONFIG%"
|
||||
- bash -lc "source activate && cd /c/projects/blis && make -j4 V=1"
|
||||
- bash -lc "source activate && cd /c/projects/blis && make install"
|
||||
- ps: Compress-Archive -Path C:\blis -DestinationPath C:\blis.zip
|
||||
|
||||
@@ -33,11 +33,7 @@
|
||||
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <pthread.h>
|
||||
|
||||
#include "bli_system.h"
|
||||
#include "bli_type_defs.h"
|
||||
#include "bli_arch.h"
|
||||
#include "bli_cpuid.h"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
85
build/regen-symbols.sh
Executable file
85
build/regen-symbols.sh
Executable file
@@ -0,0 +1,85 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# BLIS
|
||||
# An object-based framework for developing high-performance BLAS-like
|
||||
# libraries.
|
||||
#
|
||||
# Copyright (C) 2018, The University of Texas at Austin
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
# - Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# - Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# - Neither the name of copyright holder(s) nor the names
|
||||
# of its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
#
|
||||
|
||||
#
|
||||
# This script regenerates a list of symbols for use when building
|
||||
# Windows-compatible DLLs. We assume that this script will be run after
|
||||
# running configure as:
|
||||
#
|
||||
# ./configure --enable-cblas haswell
|
||||
#
|
||||
# and compiling BLIS normally. (Notice that we also prune out all
|
||||
# haswell/zen-related context initialization and reference kernels.)
|
||||
#
|
||||
|
||||
libblis='lib/haswell/libblis.so'
|
||||
symfile='build/libblis-symbols.def'
|
||||
|
||||
echo "EXPORTS" > def.exports
|
||||
#nm -g ${libblis} | grep -o " D BLIS_.*" | cut -f2- "-dD" > def.blis_const
|
||||
nm -g ${libblis} | grep -o " T bli_.*" | cut -f2- "-dT" > def.blis
|
||||
nm -g ${libblis} | grep -o " T bla_.*" | cut -f2- "-dT" > def.blis_bla
|
||||
nm -g ${libblis} | grep -o " T cblas_.*" | cut -f2- "-dT" > def.blis_cblas
|
||||
nm -g ${libblis} | grep -o " T s[acdgnrst].*" | cut -f2- "-dT" > def.blas_s
|
||||
nm -g ${libblis} | grep -o " T d[acdgnrstz].*" | cut -f2- "-dT" > def.blas_d
|
||||
nm -g ${libblis} | grep -o " T c[acdghrst].*" | cut -f2- "-dT" > def.blas_c
|
||||
nm -g ${libblis} | grep -o " T z[acdghrst].*" | cut -f2- "-dT" > def.blas_z
|
||||
nm -g ${libblis} | grep -o " T i[cdsz].*" | cut -f2- "-dT" > def.blas_i
|
||||
|
||||
cat def.exports \
|
||||
def.blis \
|
||||
def.blis_bla \
|
||||
def.blas_s \
|
||||
def.blas_d \
|
||||
def.blas_c \
|
||||
def.blas_z \
|
||||
def.blas_i \
|
||||
def.blis_cblas \
|
||||
| cut -f2- "-d " \
|
||||
| grep -v init_haswell \
|
||||
| grep -v haswell_ref \
|
||||
| grep -v zen_ref \
|
||||
> ${symfile}
|
||||
|
||||
rm -f \
|
||||
def.exports \
|
||||
def.blis \
|
||||
def.blis_bla \
|
||||
def.blas_s \
|
||||
def.blas_d \
|
||||
def.blas_c \
|
||||
def.blas_z \
|
||||
def.blas_i \
|
||||
def.blis_cblas
|
||||
|
||||
@@ -497,9 +497,9 @@ SOFLAGS := -shared
|
||||
ifeq ($(IS_WIN),yes)
|
||||
# Windows shared library link flags.
|
||||
ifeq ($(CC_VENDOR),clang)
|
||||
SOFLAGS += -Wl,-def:windows/build/libblis-symbols.def -Wl,-implib:$(BASE_LIB_PATH)/$(LIBBLIS).lib
|
||||
SOFLAGS += -Wl,-def:build/libblis-symbols.def -Wl,-implib:$(BASE_LIB_PATH)/$(LIBBLIS).lib
|
||||
else
|
||||
SOFLAGS += windows/build/libblis-symbols.def -Wl,--out-implib,$(LIBBLIS).dll.a
|
||||
SOFLAGS += build/libblis-symbols.def -Wl,--out-implib,$(LIBBLIS).dll.a
|
||||
endif
|
||||
else
|
||||
# Linux shared library link flags.
|
||||
|
||||
547
configure
vendored
547
configure
vendored
@@ -266,6 +266,8 @@ print_usage()
|
||||
echo " AR Specifies the archiver to use."
|
||||
echo " CFLAGS Specifies additional compiler flags to use (prepended)."
|
||||
echo " LDFLAGS Specifies additional linker flags to use (prepended)."
|
||||
echo " LIBPTHREAD Pthreads library to use."
|
||||
echo " PYTHON Specifies the python interpreter to use."
|
||||
echo " "
|
||||
echo " Environment variables may also be specified as command line"
|
||||
echo " options, e.g.:"
|
||||
@@ -901,32 +903,32 @@ get_cxx_search_list()
|
||||
echo "${list}"
|
||||
}
|
||||
|
||||
select_cc()
|
||||
select_tool()
|
||||
{
|
||||
local search_list CC_env the_cc cc
|
||||
|
||||
# This is the list of compilers to search for, and the order in which
|
||||
# to search for them.
|
||||
# This is the list of compilers/tools to search for, and the order in
|
||||
# which to search for them.
|
||||
search_list=$1
|
||||
|
||||
# The environment variable associated with the compiler type we
|
||||
# are searching (e.g. CC, CXX).
|
||||
# The environment variable associated with the compiler/tool type we
|
||||
# are searching (e.g. CC, CXX, PYTHON).
|
||||
CC_env=$2
|
||||
|
||||
# If CC contains something, add it to the beginning of our default
|
||||
# If CC_env contains something, add it to the beginning of our default
|
||||
# search list.
|
||||
if [ -n "${CC_env}" ]; then
|
||||
search_list="${CC_env} ${search_list}"
|
||||
fi
|
||||
|
||||
# Initialize our selected compiler to empty.
|
||||
# Initialize our selected compiler/tool to empty.
|
||||
the_cc=""
|
||||
|
||||
# Try each compiler in the list and select the first one we find that
|
||||
# Try each compiler/tool in the list and select the first one we find that
|
||||
# works.
|
||||
for cc in ${search_list}; do
|
||||
|
||||
# See if the current compiler works and/or is present.
|
||||
# See if the current compiler/tool works and/or is present.
|
||||
${cc} --version > /dev/null 2>&1
|
||||
|
||||
if [ "$?" == 0 ]; then
|
||||
@@ -935,7 +937,7 @@ select_cc()
|
||||
fi
|
||||
done
|
||||
|
||||
# Return the selected compiler.
|
||||
# Return the selected compiler/tool.
|
||||
echo "${the_cc}"
|
||||
}
|
||||
|
||||
@@ -995,7 +997,9 @@ auto_detect()
|
||||
# Set the linker flags. We need pthreads because it is needed for
|
||||
# parts of bli_arch.c unrelated to bli_arch_string(), which is called
|
||||
# by the main() function in ${main_c}.
|
||||
ldflags="${LIBPTHREAD:--lpthread}"
|
||||
if [ $is_win = no ]; then
|
||||
ldflags="${LIBPTHREAD--lpthread}"
|
||||
fi
|
||||
|
||||
# Compile the auto-detect program using source code inside the
|
||||
# framework.
|
||||
@@ -1105,6 +1109,12 @@ echoerr_unsupportedcc()
|
||||
exit 1
|
||||
}
|
||||
|
||||
echoerr_unsupportedpython()
|
||||
{
|
||||
echoerr "${script_name}: *** Unsupported python version: ${python_version}."
|
||||
exit 1
|
||||
}
|
||||
|
||||
get_binutils_version()
|
||||
{
|
||||
binutil=${AS:-as}
|
||||
@@ -1137,6 +1147,76 @@ get_binutils_version()
|
||||
echo "${script_name}: found assembler ('as') version ${bu_version} (maj: ${bu_major}, min: ${bu_minor}, rev: ${bu_revision})."
|
||||
}
|
||||
|
||||
get_python_search_list()
|
||||
{
|
||||
local list
|
||||
|
||||
# For Linux, Darwin (OS X), and generic OSes, prioritize 'python'.
|
||||
list="python python3 python2"
|
||||
|
||||
echo "${list}"
|
||||
}
|
||||
|
||||
get_python_version()
|
||||
{
|
||||
local python vendor_string
|
||||
|
||||
python="${found_python}"
|
||||
|
||||
# Query the python version. This includes the version number along
|
||||
# with other text, such as "Python ".
|
||||
# NOTE: Python seems to echo its version info to stderr, not
|
||||
# stdout, and thus we redirect stderr to stdout and capture that.
|
||||
vendor_string="$(${python} --version 2>&1)"
|
||||
|
||||
# Drop any preceding text and save only the first numbers and what
|
||||
# comes after.
|
||||
python_version=$(echo "${vendor_string}" | sed -e "s/[a-zA-Z_ ]* \([0-9]*\..*\)/\1/g")
|
||||
# Parse the version number into its major, minor, and revision
|
||||
# components.
|
||||
python_major=$(echo "${python_version}" | cut -d. -f1)
|
||||
python_minor=$(echo "${python_version}" | cut -d. -f2)
|
||||
python_revision=$(echo "${python_version}" | cut -d. -f3)
|
||||
|
||||
echo "${script_name}: found python version ${python_version} (maj: ${python_major}, min: ${python_minor}, rev: ${python_revision})."
|
||||
}
|
||||
|
||||
check_python()
|
||||
{
|
||||
local python
|
||||
|
||||
python="${found_python}"
|
||||
|
||||
#
|
||||
# Python requirements
|
||||
#
|
||||
# python1: no versions supported
|
||||
# python2: 2.7+
|
||||
# python3: 3.5+
|
||||
#
|
||||
|
||||
# Python 1.x is unsupported.
|
||||
if [ ${python_major} -eq 1 ]; then
|
||||
echoerr_unsupportedpython
|
||||
fi
|
||||
|
||||
# Python 2.6.x or older is unsupported.
|
||||
if [ ${python_major} -eq 2 ]; then
|
||||
if [ ${python_minor} -lt 7 ]; then
|
||||
echoerr_unsupportedpython
|
||||
fi
|
||||
fi
|
||||
|
||||
# Python 3.4.x or older is unsupported.
|
||||
if [ ${python_major} -eq 3 ]; then
|
||||
if [ ${python_minor} -lt 5 ]; then
|
||||
echoerr_unsupportedpython
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "${script_name}: python ${python_version} appears to be supported."
|
||||
}
|
||||
|
||||
get_compiler_version()
|
||||
{
|
||||
local cc vendor_string
|
||||
@@ -1676,224 +1756,254 @@ main()
|
||||
|
||||
# -- Command line option/argument parsing ----------------------------------
|
||||
|
||||
# Process our command line options.
|
||||
while getopts ":hp:d:s:t:r:qci:b:-:" opt; do
|
||||
case $opt in
|
||||
-)
|
||||
case "$OPTARG" in
|
||||
help)
|
||||
print_usage
|
||||
;;
|
||||
quiet)
|
||||
quiet_flag=1
|
||||
;;
|
||||
prefix=*)
|
||||
prefix_flag=1
|
||||
install_prefix_user=${OPTARG#*=}
|
||||
;;
|
||||
libdir=*)
|
||||
libdir_flag=1
|
||||
install_libdir_user=${OPTARG#*=}
|
||||
;;
|
||||
includedir=*)
|
||||
incdir_flag=1
|
||||
install_incdir_user=${OPTARG#*=}
|
||||
;;
|
||||
sharedir=*)
|
||||
sharedir_flag=1
|
||||
install_sharedir_user=${OPTARG#*=}
|
||||
;;
|
||||
enable-debug)
|
||||
debug_flag=1
|
||||
debug_type=noopt
|
||||
;;
|
||||
enable-debug=*)
|
||||
debug_flag=1
|
||||
debug_type=${OPTARG#*=}
|
||||
;;
|
||||
disable-debug)
|
||||
debug_flag=0
|
||||
;;
|
||||
enable-verbose-make)
|
||||
enable_verbose='yes'
|
||||
;;
|
||||
disable-verbose-make)
|
||||
enable_verbose='no'
|
||||
;;
|
||||
enable-arg-max-hack)
|
||||
enable_arg_max_hack='yes'
|
||||
;;
|
||||
disable-arg-max-hack)
|
||||
enable_arg_max_hack='no'
|
||||
;;
|
||||
enable-static)
|
||||
enable_static='yes'
|
||||
;;
|
||||
disable-static)
|
||||
enable_static='no'
|
||||
;;
|
||||
enable-shared)
|
||||
enable_shared='yes'
|
||||
;;
|
||||
disable-shared)
|
||||
enable_shared='no'
|
||||
;;
|
||||
enable-threading=*)
|
||||
threading_model=${OPTARG#*=}
|
||||
;;
|
||||
thread-part-jrir=*)
|
||||
thread_part_jrir=${OPTARG#*=}
|
||||
;;
|
||||
disable-threading)
|
||||
threading_model='no'
|
||||
;;
|
||||
enable-packbuf-pools)
|
||||
enable_packbuf_pools='yes'
|
||||
;;
|
||||
disable-packbuf-pools)
|
||||
enable_packbuf_pools='no'
|
||||
;;
|
||||
enable-sandbox=*)
|
||||
sandbox_flag=1
|
||||
sandbox=${OPTARG#*=}
|
||||
;;
|
||||
disable-sandbox)
|
||||
sandbox_flag=0
|
||||
;;
|
||||
int-size=*)
|
||||
int_type_size=${OPTARG#*=}
|
||||
;;
|
||||
blas-int-size=*)
|
||||
blas_int_type_size=${OPTARG#*=}
|
||||
;;
|
||||
enable-blas)
|
||||
enable_blas='yes'
|
||||
;;
|
||||
disable-blas)
|
||||
enable_blas='no'
|
||||
;;
|
||||
enable-cblas)
|
||||
enable_cblas='yes'
|
||||
;;
|
||||
disable-cblas)
|
||||
enable_cblas='no'
|
||||
;;
|
||||
enable-mixed-dt)
|
||||
enable_mixed_dt='yes'
|
||||
;;
|
||||
disable-mixed-dt)
|
||||
enable_mixed_dt='no'
|
||||
;;
|
||||
enable-mixed-dt-extra-mem)
|
||||
enable_mixed_dt_extra_mem='yes'
|
||||
;;
|
||||
disable-mixed-dt-extra-mem)
|
||||
enable_mixed_dt_extra_mem='no'
|
||||
;;
|
||||
with-memkind)
|
||||
enable_memkind='yes'
|
||||
;;
|
||||
without-memkind)
|
||||
enable_memkind='no'
|
||||
;;
|
||||
force-version=*)
|
||||
force_version=${OPTARG#*=}
|
||||
;;
|
||||
show-config-list)
|
||||
show_config_list=1
|
||||
;;
|
||||
*)
|
||||
print_usage
|
||||
;;
|
||||
esac;;
|
||||
h)
|
||||
print_usage
|
||||
;;
|
||||
p)
|
||||
prefix_flag=1
|
||||
install_prefix_user=$OPTARG
|
||||
;;
|
||||
d)
|
||||
debug_flag=1
|
||||
debug_type=$OPTARG
|
||||
;;
|
||||
s)
|
||||
sandbox_flag=1
|
||||
sandbox=$OPTARG
|
||||
;;
|
||||
q)
|
||||
quiet_flag=1
|
||||
;;
|
||||
t)
|
||||
threading_model=$OPTARG
|
||||
;;
|
||||
r)
|
||||
thread_part_jrir=$OPTARG
|
||||
;;
|
||||
i)
|
||||
int_type_size=$OPTARG
|
||||
;;
|
||||
b)
|
||||
blas_int_type_size=$OPTARG
|
||||
;;
|
||||
c)
|
||||
show_config_list=1
|
||||
;;
|
||||
\?)
|
||||
print_usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
shift $(($OPTIND - 1))
|
||||
found=true
|
||||
while $found = true; do
|
||||
|
||||
# Parse environment variables
|
||||
while [ $# -gt 0 ]; do
|
||||
case $1 in
|
||||
CC=*)
|
||||
CC=${1#*=}
|
||||
shift
|
||||
;;
|
||||
RANLIB=*)
|
||||
RANLIB=${1#*=}
|
||||
shift
|
||||
;;
|
||||
AR=*)
|
||||
AR=${1#*=}
|
||||
shift
|
||||
;;
|
||||
*=*)
|
||||
print_usage
|
||||
;;
|
||||
*)
|
||||
break
|
||||
;;
|
||||
esac
|
||||
# Process our command line options.
|
||||
unset OPTIND
|
||||
while getopts ":hp:d:s:t:r:qci:b:-:" opt; do
|
||||
case $opt in
|
||||
-)
|
||||
case "$OPTARG" in
|
||||
help)
|
||||
print_usage
|
||||
;;
|
||||
quiet)
|
||||
quiet_flag=1
|
||||
;;
|
||||
prefix=*)
|
||||
prefix_flag=1
|
||||
install_prefix_user=${OPTARG#*=}
|
||||
;;
|
||||
libdir=*)
|
||||
libdir_flag=1
|
||||
install_libdir_user=${OPTARG#*=}
|
||||
;;
|
||||
includedir=*)
|
||||
incdir_flag=1
|
||||
install_incdir_user=${OPTARG#*=}
|
||||
;;
|
||||
sharedir=*)
|
||||
sharedir_flag=1
|
||||
install_sharedir_user=${OPTARG#*=}
|
||||
;;
|
||||
enable-debug)
|
||||
debug_flag=1
|
||||
debug_type=noopt
|
||||
;;
|
||||
enable-debug=*)
|
||||
debug_flag=1
|
||||
debug_type=${OPTARG#*=}
|
||||
;;
|
||||
disable-debug)
|
||||
debug_flag=0
|
||||
;;
|
||||
enable-verbose-make)
|
||||
enable_verbose='yes'
|
||||
;;
|
||||
disable-verbose-make)
|
||||
enable_verbose='no'
|
||||
;;
|
||||
enable-arg-max-hack)
|
||||
enable_arg_max_hack='yes'
|
||||
;;
|
||||
disable-arg-max-hack)
|
||||
enable_arg_max_hack='no'
|
||||
;;
|
||||
enable-static)
|
||||
enable_static='yes'
|
||||
;;
|
||||
disable-static)
|
||||
enable_static='no'
|
||||
;;
|
||||
enable-shared)
|
||||
enable_shared='yes'
|
||||
;;
|
||||
disable-shared)
|
||||
enable_shared='no'
|
||||
;;
|
||||
enable-threading=*)
|
||||
threading_model=${OPTARG#*=}
|
||||
;;
|
||||
thread-part-jrir=*)
|
||||
thread_part_jrir=${OPTARG#*=}
|
||||
;;
|
||||
disable-threading)
|
||||
threading_model='no'
|
||||
;;
|
||||
enable-packbuf-pools)
|
||||
enable_packbuf_pools='yes'
|
||||
;;
|
||||
disable-packbuf-pools)
|
||||
enable_packbuf_pools='no'
|
||||
;;
|
||||
enable-sandbox=*)
|
||||
sandbox_flag=1
|
||||
sandbox=${OPTARG#*=}
|
||||
;;
|
||||
disable-sandbox)
|
||||
sandbox_flag=0
|
||||
;;
|
||||
int-size=*)
|
||||
int_type_size=${OPTARG#*=}
|
||||
;;
|
||||
blas-int-size=*)
|
||||
blas_int_type_size=${OPTARG#*=}
|
||||
;;
|
||||
enable-blas)
|
||||
enable_blas='yes'
|
||||
;;
|
||||
disable-blas)
|
||||
enable_blas='no'
|
||||
;;
|
||||
enable-cblas)
|
||||
enable_cblas='yes'
|
||||
;;
|
||||
disable-cblas)
|
||||
enable_cblas='no'
|
||||
;;
|
||||
enable-mixed-dt)
|
||||
enable_mixed_dt='yes'
|
||||
;;
|
||||
disable-mixed-dt)
|
||||
enable_mixed_dt='no'
|
||||
;;
|
||||
enable-mixed-dt-extra-mem)
|
||||
enable_mixed_dt_extra_mem='yes'
|
||||
;;
|
||||
disable-mixed-dt-extra-mem)
|
||||
enable_mixed_dt_extra_mem='no'
|
||||
;;
|
||||
with-memkind)
|
||||
enable_memkind='yes'
|
||||
;;
|
||||
without-memkind)
|
||||
enable_memkind='no'
|
||||
;;
|
||||
force-version=*)
|
||||
force_version=${OPTARG#*=}
|
||||
;;
|
||||
show-config-list)
|
||||
show_config_list=1
|
||||
;;
|
||||
*)
|
||||
print_usage
|
||||
;;
|
||||
esac;;
|
||||
h)
|
||||
print_usage
|
||||
;;
|
||||
p)
|
||||
prefix_flag=1
|
||||
install_prefix_user=$OPTARG
|
||||
;;
|
||||
d)
|
||||
debug_flag=1
|
||||
debug_type=$OPTARG
|
||||
;;
|
||||
s)
|
||||
sandbox_flag=1
|
||||
sandbox=$OPTARG
|
||||
;;
|
||||
q)
|
||||
quiet_flag=1
|
||||
;;
|
||||
t)
|
||||
threading_model=$OPTARG
|
||||
;;
|
||||
r)
|
||||
thread_part_jrir=$OPTARG
|
||||
;;
|
||||
i)
|
||||
int_type_size=$OPTARG
|
||||
;;
|
||||
b)
|
||||
blas_int_type_size=$OPTARG
|
||||
;;
|
||||
c)
|
||||
show_config_list=1
|
||||
;;
|
||||
\?)
|
||||
print_usage
|
||||
;;
|
||||
esac
|
||||
done
|
||||
shift $(($OPTIND - 1))
|
||||
|
||||
# Parse environment variables
|
||||
found=false
|
||||
while [ $# -gt 0 ]; do
|
||||
case $1 in
|
||||
*=*)
|
||||
var=`expr "$1" : '\([^=]*\)='`
|
||||
value=`expr "$1" : '[^=]*=\(.*\)'`
|
||||
eval $var=\$value
|
||||
export $var
|
||||
shift
|
||||
found=true
|
||||
;;
|
||||
*)
|
||||
break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
done
|
||||
|
||||
|
||||
# -- Check the operating system --------------------------------------------
|
||||
|
||||
os_name=$(uname -s)
|
||||
os_vers=$(uname -r)
|
||||
echo "${script_name}: detected ${os_name} kernel version ${os_vers}."
|
||||
|
||||
# Define a single variable off of which we can branch to tell if we are
|
||||
# building for Windows.
|
||||
is_win=no
|
||||
if [[ $os_name == MSYS* ]] || [[ $os_name == MINGW* ]] || [[ $os_name == CYGWIN* ]] ;
|
||||
then
|
||||
is_win=yes
|
||||
fi
|
||||
# Define a single variable off of which we can branch to tell if we are
|
||||
# building for Windows.
|
||||
is_win=no
|
||||
if [[ $os_name == MSYS* ]] || \
|
||||
[[ $os_name == MINGW* ]] || \
|
||||
[[ $os_name == CYGWIN* ]] ; then
|
||||
is_win=yes
|
||||
fi
|
||||
|
||||
|
||||
# -- Find a python interpreter ---------------------------------------------
|
||||
|
||||
# Acquire the python search order. This may vary based on the os found
|
||||
# above.
|
||||
python_search_list=$(get_python_search_list)
|
||||
|
||||
echo "${script_name}: python interpeter search list is: ${python_search_list}."
|
||||
|
||||
# Find a working python interpreter.
|
||||
found_python=$(select_tool "${python_search_list}" "${PYTHON}")
|
||||
|
||||
# If we didn't find any working python interpreters, we print an error
|
||||
# message.
|
||||
if [ -z "${found_python}" ]; then
|
||||
echo "${script_name}: *** Could not find working python interperter! Cannot continue."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "${script_name}: using '${found_python}' python interpreter."
|
||||
|
||||
|
||||
# -- Check the python version ----------------------------------------------
|
||||
|
||||
# Check the python interpreter's version.
|
||||
get_python_version
|
||||
check_python
|
||||
|
||||
|
||||
# -- Find a C compiler -----------------------------------------------------
|
||||
|
||||
# Acquire the compiler search order. This will vary based on the os
|
||||
# found above.
|
||||
# Acquire the compiler search order. This will vary based on the os found
|
||||
# above.
|
||||
cc_search_list=$(get_cc_search_list)
|
||||
|
||||
echo "${script_name}: C compiler search list is: ${cc_search_list}."
|
||||
|
||||
# Find a working C compiler.
|
||||
found_cc=$(select_cc "${cc_search_list}" "${CC}")
|
||||
found_cc=$(select_tool "${cc_search_list}" "${CC}")
|
||||
|
||||
# If we didn't find any working C compilers, we print an error message.
|
||||
if [ -z "${found_cc}" ]; then
|
||||
@@ -1912,9 +2022,9 @@ main()
|
||||
|
||||
echo "${script_name}: C++ compiler search list is: ${cxx_search_list}."
|
||||
|
||||
# Find a working C++ compiler. NOTE: We can reuse the select_cc()
|
||||
# Find a working C++ compiler. NOTE: We can reuse the select_tool()
|
||||
# function since it is written in a way that is general-purpose.
|
||||
found_cxx=$(select_cc "${cxx_search_list}" "${CXX}")
|
||||
found_cxx=$(select_tool "${cxx_search_list}" "${CXX}")
|
||||
|
||||
# If we didn't find any working C++ compilers, we print an error message.
|
||||
if [ -z "${found_cxx}" ]; then
|
||||
@@ -2558,10 +2668,17 @@ main()
|
||||
ranlib_esc=$(echo "${RANLIB:-ranlib}" | sed 's/\//\\\//g')
|
||||
# For AR, if the variable is not set, we use a default value of 'ar'.
|
||||
ar_esc=$(echo "${AR:-ar}" | sed 's/\//\\\//g')
|
||||
libpthread_esc=$(echo "${LIBPTHREAD:--lpthread}" | sed 's/\//\\\//g')
|
||||
libpthread_esc=$(echo "${LIBPTHREAD--lpthread}" | sed 's/\//\\\//g')
|
||||
cflags_preset_esc=$(echo "${cflags_preset}" | sed 's/\//\\\//g')
|
||||
ldflags_preset_esc=$(echo "${ldflags_preset}" | sed 's/\//\\\//g')
|
||||
|
||||
# For Windows builds, clear the libpthread_esc variable so that
|
||||
# no pthreads library is substituted into config.mk. (Windows builds
|
||||
# employ an implementation of pthreads that is internal to BLIS.)
|
||||
if [ $is_win = yes ]; then
|
||||
libpthread_esc=
|
||||
fi
|
||||
|
||||
# Typically, there are no slashes in the version variable. However,
|
||||
# downstream maintainers (such as those for Debian) may create custom
|
||||
# tags in their local clones such as "upstream/0.4.1", which obviously
|
||||
|
||||
@@ -597,7 +597,7 @@ Notes for interpreting function descriptions:
|
||||
* **[Level-1v](BLISObjectAPI.md#level-1v-operations)**: Operations on vectors:
|
||||
* [addv](BLISObjectAPI.md#addv), [amaxv](BLISObjectAPI.md#amaxv), [axpyv](BLISObjectAPI.md#axpyv), [axpbyv](BLISObjectAPI.md#axpbyv), [copyv](BLISObjectAPI.md#copyv), [dotv](BLISObjectAPI.md#dotv), [dotxv](BLISObjectAPI.md#dotxv), [invertv](BLISObjectAPI.md#invertv), [scal2v](BLISObjectAPI.md#scal2v), [scalv](BLISObjectAPI.md#scalv), [setv](BLISObjectAPI.md#setv), [setrv](BLISObjectAPI.md#setrv), [setiv](BLISObjectAPI.md#setiv), [subv](BLISObjectAPI.md#subv), [swapv](BLISObjectAPI.md#swapv), [xpbyv](BLISObjectAPI.md#xpbyv)
|
||||
* **[Level-1d](BLISObjectAPI.md#level-1d-operations)**: Element-wise operations on matrix diagonals:
|
||||
* [addd](BLISObjectAPI.md#addd), [axpyd](BLISObjectAPI.md#axpyd), [copyd](BLISObjectAPI.md#copyd), [invertd](BLISObjectAPI.md#invertd), [scald](BLISObjectAPI.md#scald), [scal2d](BLISObjectAPI.md#scal2d), [setd](BLISObjectAPI.md#setd), [setid](BLISObjectAPI.md#setid), [subd](BLISObjectAPI.md#subd)
|
||||
* [addd](BLISObjectAPI.md#addd), [axpyd](BLISObjectAPI.md#axpyd), [copyd](BLISObjectAPI.md#copyd), [invertd](BLISObjectAPI.md#invertd), [scald](BLISObjectAPI.md#scald), [scal2d](BLISObjectAPI.md#scal2d), [setd](BLISObjectAPI.md#setd), [setid](BLISObjectAPI.md#setid), [shiftd](BLISObjectAPI.md#shiftd), [subd](BLISObjectAPI.md#subd), [xpbyd](BLISObjectAPI.md#xpbyd)
|
||||
* **[Level-1m](BLISObjectAPI.md#level-1m-operations)**: Element-wise operations on matrices:
|
||||
* [addm](BLISObjectAPI.md#addm), [axpym](BLISObjectAPI.md#axpym), [copym](BLISObjectAPI.md#copym), [scalm](BLISObjectAPI.md#scalm), [scal2m](BLISObjectAPI.md#scal2m), [setm](BLISObjectAPI.md#setm), [setrm](BLISObjectAPI.md#setrm), [setim](BLISObjectAPI.md#setim), [subm](BLISObjectAPI.md#subm)
|
||||
* **[Level-1f](BLISObjectAPI.md#level-1f-operations)**: Fused operations on multiple vectors:
|
||||
@@ -771,6 +771,8 @@ Perform
|
||||
```
|
||||
where `x` is a vector of length _n_, and `alpha` is a scalar.
|
||||
|
||||
Observed object properties: `conj?(alpha)`.
|
||||
|
||||
---
|
||||
|
||||
#### scal2v
|
||||
@@ -788,6 +790,8 @@ Perform
|
||||
```
|
||||
where `x` and `y` are vectors of length _n_, and `alpha` is a scalar.
|
||||
|
||||
Observed object properties: `conj?(alpha)`, `conj?(x)`.
|
||||
|
||||
---
|
||||
|
||||
#### setv
|
||||
@@ -804,6 +808,8 @@ Perform
|
||||
```
|
||||
That is, set all elements of an _n_-length vector `x` to scalar `conj?(alpha)`.
|
||||
|
||||
Observed object properties: `conj?(alpha)`.
|
||||
|
||||
---
|
||||
|
||||
#### setrv
|
||||
@@ -856,6 +862,8 @@ Perform
|
||||
```
|
||||
where `x` and `y` are vectors of length _n_.
|
||||
|
||||
Observed object properties: `conj?(x)`.
|
||||
|
||||
---
|
||||
|
||||
#### swapv
|
||||
@@ -885,6 +893,8 @@ Perform
|
||||
```
|
||||
where `x` and `y` are vectors of length _n_, and `beta` is a scalar.
|
||||
|
||||
Observed object properties: `conj?(beta)`, `conj?(x)`.
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -908,6 +918,8 @@ void bli_addd
|
||||
);
|
||||
```
|
||||
|
||||
Observed object properties: `diagoff(A)`, `diag(A)`, `trans?(A)`.
|
||||
|
||||
---
|
||||
|
||||
#### axpyd
|
||||
@@ -920,6 +932,8 @@ void bli_axpyd
|
||||
);
|
||||
```
|
||||
|
||||
Observed object properties: `conj?(alpha)`, `diagoff(A)`, `diag(A)`, `trans?(A)`.
|
||||
|
||||
---
|
||||
|
||||
#### copyd
|
||||
@@ -931,6 +945,8 @@ void bli_copyd
|
||||
);
|
||||
```
|
||||
|
||||
Observed object properties: `diagoff(A)`, `diag(A)`, `trans?(A)`.
|
||||
|
||||
---
|
||||
|
||||
#### invertd
|
||||
@@ -941,6 +957,8 @@ void bli_invertd
|
||||
);
|
||||
```
|
||||
|
||||
Observed object properties: `diagoff(A)`.
|
||||
|
||||
---
|
||||
|
||||
#### scald
|
||||
@@ -952,6 +970,8 @@ void bli_scald
|
||||
);
|
||||
```
|
||||
|
||||
Observed object properties: `conj?(alpha)`, `diagoff(A)`.
|
||||
|
||||
---
|
||||
|
||||
#### scal2d
|
||||
@@ -964,6 +984,8 @@ void bli_scal2d
|
||||
);
|
||||
```
|
||||
|
||||
Observed object properties: `conj?(alpha)`, `diagoff(A)`, `diag(A)`, `trans?(A)`.
|
||||
|
||||
---
|
||||
|
||||
#### setd
|
||||
@@ -975,6 +997,8 @@ void bli_setd
|
||||
);
|
||||
```
|
||||
|
||||
Observed object properties: `conj?(alpha)`, `diagoff(A)`, `diag(A)`.
|
||||
|
||||
---
|
||||
|
||||
#### setid
|
||||
@@ -985,7 +1009,26 @@ void bli_setid
|
||||
obj_t* a
|
||||
);
|
||||
```
|
||||
Set the imaginary components of a matrix diagonal to a scalar `alpha`.
|
||||
Set the imaginary components of every element along the diagonal of `a`
|
||||
to a scalar `alpha`.
|
||||
Note that the datatype of `alpha` must be the real projection of the datatype
|
||||
of `a`.
|
||||
|
||||
Observed object properties: `diagoff(A)`.
|
||||
|
||||
---
|
||||
|
||||
#### shiftd
|
||||
```c
|
||||
void bli_shiftd
|
||||
(
|
||||
obj_t* alpha,
|
||||
obj_t* a
|
||||
);
|
||||
```
|
||||
Add a constant value `alpha` to every element along the diagonal of `a`.
|
||||
|
||||
Observed object properties: `diagoff(A)`.
|
||||
|
||||
---
|
||||
|
||||
@@ -998,6 +1041,22 @@ void bli_subd
|
||||
);
|
||||
```
|
||||
|
||||
Observed object properties: `diagoff(A)`, `diag(A)`, `trans?(A)`.
|
||||
|
||||
---
|
||||
|
||||
#### xpbyd
|
||||
```c
|
||||
void bli_xpbyd
|
||||
(
|
||||
obj_t* a,
|
||||
obj_t* beta,
|
||||
obj_t* b
|
||||
);
|
||||
```
|
||||
|
||||
Observed object properties: `conj?(beta)`, `diagoff(A)`, `diag(A)`, `trans?(A)`.
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
@@ -195,7 +195,7 @@ Notes for interpreting function descriptions:
|
||||
* **[Level-1v](BLISTypedAPI.md#level-1v-operations)**: Operations on vectors:
|
||||
* [addv](BLISTypedAPI.md#addv), [amaxv](BLISTypedAPI.md#amaxv), [axpyv](BLISTypedAPI.md#axpyv), [axpbyv](BLISTypedAPI.md#axpbyv), [copyv](BLISTypedAPI.md#copyv), [dotv](BLISTypedAPI.md#dotv), [dotxv](BLISTypedAPI.md#dotxv), [invertv](BLISTypedAPI.md#invertv), [scal2v](BLISTypedAPI.md#scal2v), [scalv](BLISTypedAPI.md#scalv), [setv](BLISTypedAPI.md#setv), [subv](BLISTypedAPI.md#subv), [swapv](BLISTypedAPI.md#swapv), [xpbyv](BLISTypedAPI.md#xpbyv)
|
||||
* **[Level-1d](BLISTypedAPI.md#level-1d-operations)**: Element-wise operations on matrix diagonals:
|
||||
* [addd](BLISTypedAPI.md#addd), [axpyd](BLISTypedAPI.md#axpyd), [copyd](BLISTypedAPI.md#copyd), [invertd](BLISTypedAPI.md#invertd), [scald](BLISTypedAPI.md#scald), [scal2d](BLISTypedAPI.md#scal2d), [setd](BLISTypedAPI.md#setd), [setid](BLISTypedAPI.md#setid), [subd](BLISTypedAPI.md#subd)
|
||||
* [addd](BLISTypedAPI.md#addd), [axpyd](BLISTypedAPI.md#axpyd), [copyd](BLISTypedAPI.md#copyd), [invertd](BLISTypedAPI.md#invertd), [scald](BLISTypedAPI.md#scald), [scal2d](BLISTypedAPI.md#scal2d), [setd](BLISTypedAPI.md#setd), [setid](BLISTypedAPI.md#setid), [shiftd](BLISObjectAPI.md#shiftd), [subd](BLISTypedAPI.md#subd), [xpbyd](BLISTypedAPI.md#xpbyd)
|
||||
* **[Level-1m](BLISTypedAPI.md#level-1m-operations)**: Element-wise operations on matrices:
|
||||
* [addm](BLISTypedAPI.md#addm), [axpym](BLISTypedAPI.md#axpym), [copym](BLISTypedAPI.md#copym), [scalm](BLISTypedAPI.md#scalm), [scal2m](BLISTypedAPI.md#scal2m), [setm](BLISTypedAPI.md#setm), [subm](BLISTypedAPI.md#subm)
|
||||
* **[Level-1f](BLISTypedAPI.md#level-1f-operations)**: Fused operations on multiple vectors:
|
||||
@@ -476,7 +476,7 @@ where `x` and `y` are vectors of length _n_, and `beta` is a scalar.
|
||||
|
||||
Level-1d operations perform various level-1 BLAS-like operations on matrix diagonals (hence the _d_).
|
||||
|
||||
These operations are similar to their level-1m counterparts, except they only read and update matrix diagonals and therefore do not take any `uplo` arguments. Please see the descriptions for the corresponding level-1m operation for a description of the arguments.
|
||||
Most of these operations are similar to level-1m counterparts, except they only read and update matrix diagonals and therefore do not take any `uplo` arguments. Please see the descriptions for the corresponding level-1m operation for a description of the arguments.
|
||||
|
||||
---
|
||||
|
||||
@@ -592,6 +592,24 @@ void bli_?setd
|
||||
#### setid
|
||||
```c
|
||||
void bli_?setid
|
||||
(
|
||||
doff_t diagoffa,
|
||||
dim_t m,
|
||||
dim_t n,
|
||||
ctype_r* alpha,
|
||||
ctype* a, inc_t rsa, inc_t csa
|
||||
);
|
||||
```
|
||||
Set the imaginary components of every element along the diagonal of `a`, as
|
||||
specified by `diagoffa`, to a scalar `alpha`.
|
||||
Note that the datatype of `alpha` must be the real projection of the datatype
|
||||
of `a`.
|
||||
|
||||
---
|
||||
|
||||
#### shiftd
|
||||
```c
|
||||
void bli_?shiftd
|
||||
(
|
||||
doff_t diagoffa,
|
||||
dim_t m,
|
||||
@@ -600,7 +618,8 @@ void bli_?setid
|
||||
ctype* a, inc_t rsa, inc_t csa
|
||||
);
|
||||
```
|
||||
Set the imaginary components of a matrix diagonal to a scalar `alpha`.
|
||||
Add a constant value `alpha` to every element along the diagonal of `a`, as
|
||||
specified by `diagoffa`.
|
||||
|
||||
---
|
||||
|
||||
@@ -620,6 +639,23 @@ void bli_?subd
|
||||
|
||||
---
|
||||
|
||||
#### xpbyd
|
||||
```c
|
||||
void bli_?xpbyd
|
||||
(
|
||||
doff_t diagoffa,
|
||||
diag_t diaga,
|
||||
trans_t transa,
|
||||
dim_t m,
|
||||
dim_t n,
|
||||
ctype* a, inc_t rsa, inc_t csa,
|
||||
ctype* beta,
|
||||
ctype* b, inc_t rsb, inc_t csb
|
||||
);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
|
||||
|
||||
## Level-1m operations
|
||||
|
||||
@@ -22,10 +22,10 @@ The BLIS build system was designed for use with GNU/Linux (or some other sane UN
|
||||
|
||||
* Python (2.7 or later)
|
||||
* GNU `bash` (3.2 or later)
|
||||
* GNU `make`
|
||||
* a working C compiler
|
||||
* GNU `make` (3.81 or later)
|
||||
* a working C99 compiler
|
||||
|
||||
BLIS also requires a POSIX threads library at link-time (`-lpthread` or `libpthread.so`). This requirement holds even when configuring BLIS with multithreading disabled (the default) or with multithreading via OpenMP (`--enable-multithreading=openmp`).
|
||||
BLIS also requires a POSIX threads library at link-time (`-lpthread` or `libpthread.so`). This requirement holds even when configuring BLIS with multithreading disabled (the default) or with multithreading via OpenMP (`--enable-multithreading=openmp`). (Note: BLIS implements basic pthreads functionality automatically for Windows builds via [AppVeyor](https://ci.appveyor.com/project/shpc/blis/).)
|
||||
|
||||
Finally, we also require various other shell utilities that are so ubiquitous that they are not worth mentioning (such as `mv`, `mkdir`, `find`, and so forth). If you are missing these utilities, then you have much bigger problems than not being able to build BLIS.
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ int main( int argc, char** argv )
|
||||
dim_t m, n;
|
||||
inc_t rs, cs;
|
||||
|
||||
obj_t a, x, y, b, d;
|
||||
obj_t a, x, y, b;
|
||||
obj_t* alpha;
|
||||
obj_t* beta;
|
||||
|
||||
@@ -297,9 +297,7 @@ int main( int argc, char** argv )
|
||||
// Load the diagonal. By setting the diagonal to something of greater
|
||||
// absolute value than the off-diagonal elements, we increase the odds
|
||||
// that the matrix is not singular (singular matrices have no inverse).
|
||||
bli_obj_create( dt, m, m, 0, 0, &d );
|
||||
bli_setd( &BLIS_TWO, &d );
|
||||
bli_addd( &d, &a );
|
||||
bli_shiftd( &BLIS_TWO, &a );
|
||||
|
||||
bli_printm( "a: randomized (zeros in upper triangle)", &a, "%4.1f", "" );
|
||||
bli_printm( "b: initial value", &b, "%4.1f", "" );
|
||||
@@ -320,7 +318,6 @@ int main( int argc, char** argv )
|
||||
// Free the objects.
|
||||
bli_obj_free( &a );
|
||||
bli_obj_free( &b );
|
||||
bli_obj_free( &d );
|
||||
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -42,7 +42,7 @@ int main( int argc, char** argv )
|
||||
inc_t rs, cs;
|
||||
side_t side;
|
||||
|
||||
obj_t a, b, c, d;
|
||||
obj_t a, b, c;
|
||||
obj_t* alpha;
|
||||
obj_t* beta;
|
||||
|
||||
@@ -299,9 +299,7 @@ int main( int argc, char** argv )
|
||||
// Load the diagonal. By setting the diagonal to something of greater
|
||||
// absolute value than the off-diagonal elements, we increase the odds
|
||||
// that the matrix is not singular (singular matrices have no inverse).
|
||||
bli_obj_create( dt, m, m, 0, 0, &d );
|
||||
bli_setd( &BLIS_TWO, &d );
|
||||
bli_addd( &d, &a );
|
||||
bli_shiftd( &BLIS_TWO, &a );
|
||||
|
||||
bli_printm( "a: randomized (zeros in upper triangle)", &a, "%4.1f", "" );
|
||||
bli_printm( "b: initial value", &b, "%4.1f", "" );
|
||||
@@ -323,7 +321,6 @@ int main( int argc, char** argv )
|
||||
bli_obj_free( &a );
|
||||
bli_obj_free( &b );
|
||||
bli_obj_free( &c );
|
||||
bli_obj_free( &d );
|
||||
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -41,7 +41,6 @@ int main( int argc, char** argv )
|
||||
double* x;
|
||||
double* y;
|
||||
double* b;
|
||||
double* d;
|
||||
double alpha, beta;
|
||||
dim_t m, n;
|
||||
inc_t rs, cs;
|
||||
@@ -286,10 +285,7 @@ int main( int argc, char** argv )
|
||||
// Load the diagonal. By setting the diagonal to something of greater
|
||||
// absolute value than the off-diagonal elements, we increase the odds
|
||||
// that the matrix is not singular (singular matrices have no inverse).
|
||||
d = malloc( m * m * sizeof( double ) );
|
||||
bli_dsetd( BLIS_NO_CONJUGATE, 0, m, m, &two, d, 1, m );
|
||||
bli_daddd( 0, BLIS_NONUNIT_DIAG, BLIS_NO_TRANSPOSE,
|
||||
m, m, d, 1, m, a, rs, cs );
|
||||
bli_dshiftd( 0, m, m, &two, a, rs, cs );
|
||||
|
||||
bli_dprintm( "a: randomized (zeros in upper triangle)", m, m, a, rs, cs, "%4.1f", "" );
|
||||
bli_dprintm( "b: intial value", 1, m, b, m, 1, "%4.1f", "" );
|
||||
|
||||
@@ -45,7 +45,6 @@ int main( int argc, char** argv )
|
||||
double* a;
|
||||
double* b;
|
||||
double* c;
|
||||
double* d;
|
||||
double alpha, beta;
|
||||
|
||||
// Initialize some basic constants.
|
||||
@@ -311,10 +310,7 @@ int main( int argc, char** argv )
|
||||
// Load the diagonal. By setting the diagonal to something of greater
|
||||
// absolute value than the off-diagonal elements, we increase the odds
|
||||
// that the matrix is not singular (singular matrices have no inverse).
|
||||
d = malloc( m * m * sizeof( double ) );
|
||||
bli_dsetd( BLIS_NO_CONJUGATE, 0, m, m, &two, d, 1, m );
|
||||
bli_daddd( 0, BLIS_NONUNIT_DIAG, BLIS_NO_TRANSPOSE,
|
||||
m, m, d, 1, m, a, rsa, csa );
|
||||
bli_dshiftd( 0, m, m, &two, a, rsa, csa );
|
||||
|
||||
bli_dprintm( "a: randomized (zeros in upper triangle)", m, m, a, rsa, csa, "%4.1f", "" );
|
||||
bli_dprintm( "b: initial value", m, n, b, rsb, csb, "%4.1f", "" );
|
||||
@@ -339,7 +335,6 @@ int main( int argc, char** argv )
|
||||
free( a );
|
||||
free( b );
|
||||
free( c );
|
||||
free( d );
|
||||
|
||||
|
||||
return 0;
|
||||
|
||||
@@ -101,6 +101,7 @@ void PASTEMAC(opname,_check) \
|
||||
GENFRONT( scald )
|
||||
GENFRONT( setd )
|
||||
GENFRONT( setid )
|
||||
GENFRONT( shiftd )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
|
||||
@@ -88,6 +88,7 @@ void PASTEMAC(opname,_check) \
|
||||
GENTPROT( scald )
|
||||
GENTPROT( setd )
|
||||
GENTPROT( setid )
|
||||
GENTPROT( shiftd )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
|
||||
@@ -59,5 +59,6 @@ GENFRONT( invertd )
|
||||
GENFRONT( scald )
|
||||
GENFRONT( setd )
|
||||
GENFRONT( setid )
|
||||
GENFRONT( shiftd )
|
||||
GENFRONT( xpbyd )
|
||||
|
||||
|
||||
@@ -51,5 +51,6 @@ GENPROT( invertd )
|
||||
GENPROT( scald )
|
||||
GENPROT( setd )
|
||||
GENPROT( setid )
|
||||
GENPROT( shiftd )
|
||||
GENPROT( xpbyd )
|
||||
|
||||
|
||||
@@ -131,6 +131,23 @@ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \
|
||||
|
||||
INSERT_GENTDEFR( setid )
|
||||
|
||||
// shiftd
|
||||
|
||||
#undef GENTDEF
|
||||
#define GENTDEF( ctype, ch, opname, tsuf ) \
|
||||
\
|
||||
typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \
|
||||
( \
|
||||
doff_t diagoffx, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
ctype* alpha, \
|
||||
ctype* x, inc_t rs_x, inc_t cs_x \
|
||||
BLIS_TAPI_EX_PARAMS \
|
||||
);
|
||||
|
||||
INSERT_GENTDEF( shiftd )
|
||||
|
||||
// xpbyd
|
||||
|
||||
#undef GENTDEF
|
||||
|
||||
@@ -239,7 +239,7 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
/* Create local copy-casts of scalars (and apply internal conjugation
|
||||
as needed). */ \
|
||||
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
|
||||
alpha, &alpha_local ); \
|
||||
alpha, &alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
|
||||
\
|
||||
/* Query a type-specific function pointer, except one that uses
|
||||
@@ -312,6 +312,62 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
GENFRONT( setid )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
\
|
||||
void PASTEMAC(opname,EX_SUF) \
|
||||
( \
|
||||
obj_t* alpha, \
|
||||
obj_t* x \
|
||||
BLIS_OAPI_EX_PARAMS \
|
||||
) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
BLIS_OAPI_EX_DECLS \
|
||||
\
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
doff_t diagoffx = bli_obj_diag_offset( x ); \
|
||||
dim_t m = bli_obj_length( x ); \
|
||||
dim_t n = bli_obj_width( x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t rs_x = bli_obj_row_stride( x ); \
|
||||
inc_t cs_x = bli_obj_col_stride( x ); \
|
||||
\
|
||||
void* buf_alpha; \
|
||||
\
|
||||
obj_t alpha_local; \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( alpha, x ); \
|
||||
\
|
||||
/* Create local copy-casts of scalars (and apply internal conjugation
|
||||
as needed). */ \
|
||||
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
|
||||
alpha, &alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
|
||||
\
|
||||
/* Query a type-specific function pointer, except one that uses
|
||||
void* instead of typed pointers. */ \
|
||||
PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \
|
||||
PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \
|
||||
\
|
||||
f \
|
||||
( \
|
||||
diagoffx, \
|
||||
m, \
|
||||
n, \
|
||||
buf_alpha, \
|
||||
buf_x, rs_x, cs_x, \
|
||||
cntx, \
|
||||
rntm \
|
||||
); \
|
||||
}
|
||||
|
||||
GENFRONT( shiftd )
|
||||
|
||||
|
||||
#undef GENFRONT
|
||||
#define GENFRONT( opname ) \
|
||||
\
|
||||
|
||||
@@ -92,6 +92,7 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
GENTPROT( scald )
|
||||
GENTPROT( setd )
|
||||
GENTPROT( setid )
|
||||
GENTPROT( shiftd )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
|
||||
@@ -387,6 +387,64 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
INSERT_GENTFUNCR_BASIC2( setid, setv, BLIS_SETV_KER )
|
||||
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, opname, kername, kerid ) \
|
||||
\
|
||||
void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
( \
|
||||
doff_t diagoffx, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
ctype* alpha, \
|
||||
ctype* x, inc_t rs_x, inc_t cs_x \
|
||||
BLIS_TAPI_EX_PARAMS \
|
||||
) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
BLIS_TAPI_EX_DECLS \
|
||||
\
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
\
|
||||
ctype* x1; \
|
||||
dim_t n_elem; \
|
||||
dim_t offx; \
|
||||
inc_t incx; \
|
||||
\
|
||||
if ( bli_zero_dim2( m, n ) ) return; \
|
||||
\
|
||||
if ( bli_is_outside_diag( diagoffx, BLIS_NO_TRANSPOSE, m, n ) ) return; \
|
||||
\
|
||||
/* Determine the distance to the diagonals, the number of diagonal
|
||||
elements, and the diagonal increments. */ \
|
||||
bli_set_dims_incs_1d \
|
||||
( \
|
||||
diagoffx, \
|
||||
m, n, rs_x, cs_x, \
|
||||
&offx, &n_elem, &incx \
|
||||
); \
|
||||
\
|
||||
x1 = x + offx; \
|
||||
\
|
||||
/* Obtain a valid context from the gks if necessary. */ \
|
||||
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
|
||||
\
|
||||
/* Query the context for the operation's kernel address. */ \
|
||||
PASTECH2(ch,kername,_ker_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx ); \
|
||||
\
|
||||
/* Invoke the kernel with the appropriate parameters. */ \
|
||||
f( \
|
||||
BLIS_NO_CONJUGATE, \
|
||||
n_elem, \
|
||||
alpha, 0, \
|
||||
x1, incx, \
|
||||
cntx \
|
||||
); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC2( shiftd, addv, BLIS_ADDV_KER )
|
||||
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, opname, kername, kerid ) \
|
||||
\
|
||||
|
||||
@@ -126,6 +126,22 @@ void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
INSERT_GENTPROTR_BASIC0( setid )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, opname ) \
|
||||
\
|
||||
void PASTEMAC2(ch,opname,EX_SUF) \
|
||||
( \
|
||||
doff_t diagoffx, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
ctype* alpha, \
|
||||
ctype* x, inc_t rs_x, inc_t cs_x \
|
||||
BLIS_TAPI_EX_PARAMS \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC0( shiftd )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, opname ) \
|
||||
\
|
||||
|
||||
@@ -36,10 +36,7 @@
|
||||
#ifndef BLIS_CONFIGURETIME_CPUID
|
||||
#include "blis.h"
|
||||
#else
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <pthread.h>
|
||||
#include "bli_system.h"
|
||||
#include "bli_type_defs.h"
|
||||
#include "bli_arch.h"
|
||||
#include "bli_cpuid.h"
|
||||
@@ -67,7 +64,9 @@ static pthread_once_t once_id = PTHREAD_ONCE_INIT;
|
||||
|
||||
void bli_arch_set_id_once( void )
|
||||
{
|
||||
#ifndef BLIS_CONFIGURETIME_CPUID
|
||||
pthread_once( &once_id, bli_arch_set_id );
|
||||
#endif
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
@@ -35,10 +35,7 @@
|
||||
|
||||
#if 0
|
||||
// Used only during standalone testing of ARM support.
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include "bli_system.h"
|
||||
#include "bli_type_defs.h"
|
||||
#include "bli_cpuid.h"
|
||||
#undef __x86_64__
|
||||
@@ -51,10 +48,7 @@
|
||||
#ifndef BLIS_CONFIGURETIME_CPUID
|
||||
#include "blis.h"
|
||||
#else
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <pthread.h>
|
||||
#include "bli_system.h"
|
||||
#include "bli_type_defs.h"
|
||||
#include "bli_cpuid.h"
|
||||
#endif
|
||||
|
||||
198
frame/base/bli_pthread_wrap.c
Normal file
198
frame/base/bli_pthread_wrap.c
Normal file
@@ -0,0 +1,198 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2018, Southern Methodist University
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
#include <errno.h>
|
||||
|
||||
#if defined(_MSC_VER) && !defined(BLIS_ENABLE_PTHREADS)
|
||||
|
||||
int pthread_mutex_init( pthread_mutex_t* mutex,
|
||||
const pthread_mutexattr_t* attr )
|
||||
{
|
||||
if ( attr ) return EINVAL;
|
||||
InitializeSRWLock( mutex );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pthread_mutex_destroy( pthread_mutex_t* mutex )
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pthread_mutex_lock( pthread_mutex_t* mutex )
|
||||
{
|
||||
AcquireSRWLockExclusive( mutex );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pthread_mutex_trylock( pthread_mutex_t* mutex )
|
||||
{
|
||||
return TryAcquireSRWLockExclusive( mutex ) ? 0 : EBUSY;
|
||||
}
|
||||
|
||||
int pthread_mutex_unlock( pthread_mutex_t* mutex )
|
||||
{
|
||||
ReleaseSRWLockExclusive( mutex );
|
||||
return 0;
|
||||
}
|
||||
|
||||
static BOOL bli_init_once_wrapper( pthread_once_t* once,
|
||||
void* param,
|
||||
void** context)
|
||||
{
|
||||
( void )once;
|
||||
( void )context;
|
||||
typedef void (*callback)( void );
|
||||
((callback)param)();
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
void pthread_once( pthread_once_t* once, void (*init)(void) )
|
||||
{
|
||||
InitOnceExecuteOnce( once, bli_init_once_wrapper, init, NULL );
|
||||
}
|
||||
|
||||
int pthread_cond_init( pthread_cond_t* cond,
|
||||
const pthread_condattr_t* attr )
|
||||
{
|
||||
if ( attr ) return EINVAL;
|
||||
InitializeConditionVariable( cond );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pthread_cond_destroy( pthread_cond_t* cond )
|
||||
{
|
||||
( void )cond;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pthread_cond_wait( pthread_cond_t* cond,
|
||||
pthread_mutex_t* mutex )
|
||||
{
|
||||
if ( !SleepConditionVariableSRW( cond, mutex, INFINITE, 0 ) ) return EAGAIN;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pthread_cond_broadcast( pthread_cond_t* cond )
|
||||
{
|
||||
WakeAllConditionVariable( cond );
|
||||
return 0;
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
void* (*start_routine)( void* );
|
||||
void* param;
|
||||
void** retval;
|
||||
} bli_thread_param;
|
||||
|
||||
static DWORD bli_thread_func( void* param_ )
|
||||
{
|
||||
bli_thread_param* param = param_;
|
||||
*param->retval = param->start_routine( param->param );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pthread_create( pthread_t* thread,
|
||||
const pthread_attr_t* attr,
|
||||
void* (*start_routine)(void*),
|
||||
void* arg )
|
||||
{
|
||||
if ( attr ) return EINVAL;
|
||||
bli_thread_param param = { start_routine, arg, &thread->retval };
|
||||
thread->handle = CreateThread( NULL, 0, bli_thread_func, ¶m, 0, NULL );
|
||||
if ( !thread->handle ) return EAGAIN;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pthread_join( pthread_t thread,
|
||||
void** retval )
|
||||
{
|
||||
if ( !WaitForSingleObject( thread.handle, INFINITE ) ) return EAGAIN;
|
||||
if ( retval ) *retval = thread.retval;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__) || defined(_MSC_VER)
|
||||
|
||||
int pthread_barrier_init( pthread_barrier_t* barrier,
|
||||
const pthread_barrierattr_t* attr,
|
||||
unsigned int count )
|
||||
{
|
||||
if ( attr ) return EINVAL;
|
||||
if ( count == 0 ) return EINVAL;
|
||||
|
||||
int err;
|
||||
if ( (err = pthread_mutex_init( &barrier->mutex, 0 )) != 0 ) return err;
|
||||
if ( (err = pthread_cond_init( &barrier->cond, 0 )) != 0 )
|
||||
{
|
||||
pthread_mutex_destroy( &barrier->mutex );
|
||||
return err;
|
||||
}
|
||||
barrier->tripCount = count;
|
||||
barrier->count = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pthread_barrier_destroy( pthread_barrier_t *barrier )
|
||||
{
|
||||
pthread_cond_destroy( &barrier->cond );
|
||||
pthread_mutex_destroy( &barrier->mutex );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int pthread_barrier_wait( pthread_barrier_t *barrier )
|
||||
{
|
||||
pthread_mutex_lock( &barrier->mutex );
|
||||
++(barrier->count);
|
||||
if ( barrier->count >= barrier->tripCount )
|
||||
{
|
||||
barrier->count = 0;
|
||||
pthread_cond_broadcast( &barrier->cond );
|
||||
pthread_mutex_unlock( &barrier->mutex );
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
pthread_cond_wait( &barrier->cond, &(barrier->mutex) );
|
||||
pthread_mutex_unlock( &barrier->mutex );
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
112
frame/base/bli_pthread_wrap.h
Normal file
112
frame/base/bli_pthread_wrap.h
Normal file
@@ -0,0 +1,112 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2018, Southern Methodist University
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_PTHREAD_WRAP_H
|
||||
#define BLIS_PTHREAD_WRAP_H
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
|
||||
typedef SRWLOCK pthread_mutex_t;
|
||||
typedef void pthread_mutexattr_t;
|
||||
|
||||
#define PTHREAD_MUTEX_INITIALIZER SRWLOCK_INIT
|
||||
|
||||
int pthread_mutex_init( pthread_mutex_t* mutex, const pthread_mutexattr_t *attr );
|
||||
|
||||
int pthread_mutex_destroy( pthread_mutex_t* mutex );
|
||||
|
||||
int pthread_mutex_lock( pthread_mutex_t* mutex );
|
||||
|
||||
int pthread_mutex_trylock( pthread_mutex_t* mutex );
|
||||
|
||||
int pthread_mutex_unlock( pthread_mutex_t* mutex );
|
||||
|
||||
typedef INIT_ONCE pthread_once_t;
|
||||
|
||||
#define PTHREAD_ONCE_INIT INIT_ONCE_STATIC_INIT
|
||||
|
||||
void pthread_once( pthread_once_t* once, void (*init)( void ) );
|
||||
|
||||
typedef CONDITION_VARIABLE pthread_cond_t;
|
||||
typedef void pthread_condattr_t;
|
||||
|
||||
#define PTHREAD_COND_INITIALIZER CONDITION_VARIABLE_INIT
|
||||
|
||||
int pthread_cond_init( pthread_cond_t* cond, const pthread_condattr_t* attr );
|
||||
|
||||
int pthread_cond_destroy( pthread_cond_t* cond );
|
||||
|
||||
int pthread_cond_wait( pthread_cond_t* cond, pthread_mutex_t* mutex );
|
||||
|
||||
int pthread_cond_broadcast( pthread_cond_t* cond );
|
||||
|
||||
typedef struct
|
||||
{
|
||||
HANDLE handle;
|
||||
void* retval;
|
||||
} pthread_t;
|
||||
|
||||
typedef void pthread_attr_t;
|
||||
|
||||
int pthread_create( pthread_t *thread, const pthread_attr_t *attr, void* (*start_routine)( void* ), void *arg );
|
||||
|
||||
int pthread_join( pthread_t thread, void **retval );
|
||||
|
||||
#else
|
||||
|
||||
#include <pthread.h>
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__APPLE__) || defined(_MSC_VER)
|
||||
|
||||
typedef void pthread_barrierattr_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
pthread_mutex_t mutex;
|
||||
pthread_cond_t cond;
|
||||
int count;
|
||||
int tripCount;
|
||||
} pthread_barrier_t;
|
||||
|
||||
int pthread_barrier_init( pthread_barrier_t *barrier, const pthread_barrierattr_t *attr, unsigned int count );
|
||||
|
||||
int pthread_barrier_destroy( pthread_barrier_t *barrier );
|
||||
|
||||
int pthread_barrier_wait( pthread_barrier_t *barrier );
|
||||
|
||||
#endif // _POSIX_BARRIERS
|
||||
|
||||
#endif
|
||||
@@ -103,7 +103,8 @@
|
||||
|
||||
// POSIX threads are unconditionally required, regardless of whether
|
||||
// multithreading is enabled via pthreads or OpenMP (or disabled).
|
||||
#include <pthread.h>
|
||||
// If pthreads is not available (Windows), then fake it.
|
||||
#include "bli_pthread_wrap.h"
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -915,7 +915,7 @@ typedef struct
|
||||
|
||||
// -- Memory broker object type --
|
||||
|
||||
#include <pthread.h>
|
||||
#include "bli_pthread_wrap.h"
|
||||
#include "bli_malloc.h"
|
||||
|
||||
typedef struct membrk_s
|
||||
|
||||
@@ -68,6 +68,7 @@ extern "C" {
|
||||
// -- Threading definitions --
|
||||
|
||||
#include "bli_thread.h"
|
||||
#include "bli_pthread.h"
|
||||
|
||||
|
||||
// -- Constant definitions --
|
||||
|
||||
146
frame/thread/bli_pthread.c
Normal file
146
frame/thread/bli_pthread.c
Normal file
@@ -0,0 +1,146 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2018, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
/*
|
||||
This wrapper API to pthreads is provided so that BLIS applications (such
|
||||
as the BLIS testsuite) can call pthreads functions without worrying about
|
||||
whether the implementation is external, provided via system library, or
|
||||
provided by BLIS. In theory, it shouldn't matter, right? pthreads is
|
||||
pthreads as long as the correct API is provided. However, in the
|
||||
situations where BLIS *does* decide that it needs to provide a pthreads
|
||||
implementation, such as on Windows, providing and exporting the API below
|
||||
means that BLIS does not need to export pthreads symbols. (To be clear:
|
||||
exporting pthreads symbols would work, but it seems ill-advised for BLIS
|
||||
to ever export functionality beyond that of which it advertises itself as
|
||||
providing: namely, BLAS-like and BLAS APIs.)
|
||||
*/
|
||||
|
||||
// -- pthread_create(), pthread_join() -----------------------------------------
|
||||
|
||||
int bli_pthread_create
|
||||
(
|
||||
bli_pthread_t* thread,
|
||||
const bli_pthread_attr_t* attr,
|
||||
void* (*start_routine)(void*),
|
||||
void* arg
|
||||
)
|
||||
{
|
||||
return pthread_create( thread, attr, start_routine, arg );
|
||||
}
|
||||
|
||||
int bli_pthread_join
|
||||
(
|
||||
bli_pthread_t thread,
|
||||
void** retval
|
||||
)
|
||||
{
|
||||
return pthread_join( thread, retval );
|
||||
}
|
||||
|
||||
// -- pthread_mutex_*() --------------------------------------------------------
|
||||
|
||||
int bli_pthread_mutex_init
|
||||
(
|
||||
bli_pthread_mutex_t* mutex,
|
||||
const bli_pthread_mutexattr_t* attr
|
||||
)
|
||||
{
|
||||
return pthread_mutex_init( mutex, attr );
|
||||
}
|
||||
|
||||
int bli_pthread_mutex_destroy
|
||||
(
|
||||
bli_pthread_mutex_t* mutex
|
||||
)
|
||||
{
|
||||
return pthread_mutex_destroy( mutex );
|
||||
}
|
||||
|
||||
int bli_pthread_mutex_lock
|
||||
(
|
||||
bli_pthread_mutex_t* mutex
|
||||
)
|
||||
{
|
||||
return pthread_mutex_lock( mutex );
|
||||
}
|
||||
|
||||
int bli_pthread_mutex_unlock
|
||||
(
|
||||
bli_pthread_mutex_t* mutex
|
||||
)
|
||||
{
|
||||
return pthread_mutex_lock( mutex );
|
||||
}
|
||||
|
||||
// -- pthread_barrier_*() ------------------------------------------------------
|
||||
|
||||
int bli_pthread_barrier_init
|
||||
(
|
||||
bli_pthread_barrier_t* barrier,
|
||||
const bli_pthread_barrierattr_t* attr,
|
||||
unsigned int count
|
||||
)
|
||||
{
|
||||
return pthread_barrier_init( barrier, attr, count );
|
||||
}
|
||||
|
||||
int bli_pthread_barrier_destroy
|
||||
(
|
||||
bli_pthread_barrier_t* barrier
|
||||
)
|
||||
{
|
||||
return pthread_barrier_destroy( barrier );
|
||||
}
|
||||
|
||||
int bli_pthread_barrier_wait
|
||||
(
|
||||
bli_pthread_barrier_t* barrier
|
||||
)
|
||||
{
|
||||
return pthread_barrier_wait( barrier );
|
||||
}
|
||||
|
||||
// -- pthread_once() -----------------------------------------------------------
|
||||
|
||||
void bli_pthread_once
|
||||
(
|
||||
bli_pthread_once_t* once,
|
||||
void (*init)(void)
|
||||
)
|
||||
{
|
||||
pthread_once( once, init );
|
||||
}
|
||||
|
||||
109
frame/thread/bli_pthread.h
Normal file
109
frame/thread/bli_pthread.h
Normal file
@@ -0,0 +1,109 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2018, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
// -- pthread types ------------------------------------------------------------
|
||||
|
||||
typedef pthread_t bli_pthread_t;
|
||||
typedef pthread_attr_t bli_pthread_attr_t;
|
||||
typedef pthread_mutex_t bli_pthread_mutex_t;
|
||||
typedef pthread_mutexattr_t bli_pthread_mutexattr_t;
|
||||
typedef pthread_barrier_t bli_pthread_barrier_t;
|
||||
typedef pthread_barrierattr_t bli_pthread_barrierattr_t;
|
||||
typedef pthread_once_t bli_pthread_once_t;
|
||||
|
||||
// -- pthread_create(), pthread_join() -----------------------------------------
|
||||
|
||||
int bli_pthread_create
|
||||
(
|
||||
bli_pthread_t* thread,
|
||||
const bli_pthread_attr_t* attr,
|
||||
void* (*start_routine)(void*),
|
||||
void* arg
|
||||
);
|
||||
|
||||
int bli_pthread_join
|
||||
(
|
||||
bli_pthread_t thread,
|
||||
void** retval
|
||||
);
|
||||
|
||||
// -- pthread_mutex_*() --------------------------------------------------------
|
||||
|
||||
int bli_pthread_mutex_init
|
||||
(
|
||||
bli_pthread_mutex_t* mutex,
|
||||
const bli_pthread_mutexattr_t* attr
|
||||
);
|
||||
|
||||
int bli_pthread_mutex_destroy
|
||||
(
|
||||
bli_pthread_mutex_t* mutex
|
||||
);
|
||||
|
||||
int bli_pthread_mutex_lock
|
||||
(
|
||||
bli_pthread_mutex_t* mutex
|
||||
);
|
||||
|
||||
int bli_pthread_mutex_unlock
|
||||
(
|
||||
bli_pthread_mutex_t* mutex
|
||||
);
|
||||
|
||||
// -- pthread_barrier_*() ------------------------------------------------------
|
||||
|
||||
int bli_pthread_barrier_init
|
||||
(
|
||||
bli_pthread_barrier_t* barrier,
|
||||
const bli_pthread_barrierattr_t* attr,
|
||||
unsigned int count
|
||||
);
|
||||
|
||||
int bli_pthread_barrier_destroy
|
||||
(
|
||||
bli_pthread_barrier_t* barrier
|
||||
);
|
||||
|
||||
int bli_pthread_barrier_wait
|
||||
(
|
||||
bli_pthread_barrier_t* barrier
|
||||
);
|
||||
|
||||
// -- pthread_once_*() ---------------------------------------------------------
|
||||
|
||||
void bli_pthread_once
|
||||
(
|
||||
bli_pthread_once_t* once,
|
||||
void (*init)(void)
|
||||
);
|
||||
@@ -38,8 +38,6 @@
|
||||
// Define thrcomm_t for situations when POSIX multithreading is enabled.
|
||||
#ifdef BLIS_ENABLE_PTHREADS
|
||||
|
||||
#include <pthread.h>
|
||||
|
||||
#ifdef BLIS_USE_PTHREAD_BARRIER
|
||||
struct thrcomm_s
|
||||
{
|
||||
|
||||
@@ -39,6 +39,27 @@
|
||||
|
||||
int main( int argc, char** argv )
|
||||
{
|
||||
obj_t kappa, d;
|
||||
num_t dtd = BLIS_DCOMPLEX;
|
||||
|
||||
bli_obj_create( dtd, 1, 1, 0, 0, &kappa );
|
||||
|
||||
bli_setsc( (2.0/1.0), -0.5, &kappa );
|
||||
|
||||
bli_obj_create( dtd, 5, 3, 0, 0, &d );
|
||||
bli_randm( &d );
|
||||
|
||||
bli_printm( "d", &d, "%7.3f", "" );
|
||||
|
||||
bli_shiftd( &kappa, &d );
|
||||
|
||||
bli_printm( "d after", &d, "%7.3f", "" );
|
||||
|
||||
bli_obj_free( &kappa );
|
||||
bli_obj_free( &d );
|
||||
|
||||
return 0;
|
||||
|
||||
obj_t a, b, c;
|
||||
obj_t c_save;
|
||||
obj_t alpha, beta;
|
||||
|
||||
@@ -41,7 +41,7 @@
|
||||
|
||||
int main( int argc, char** argv )
|
||||
{
|
||||
obj_t a, c, d;
|
||||
obj_t a, c;
|
||||
obj_t c_save;
|
||||
obj_t alpha;
|
||||
dim_t m, n;
|
||||
@@ -163,11 +163,6 @@ int main( int argc, char** argv )
|
||||
//bli_obj_create( dt, m, n, n, 1, &c );
|
||||
bli_obj_create( dt, m, n, 0, 0, &c_save );
|
||||
|
||||
if ( bli_does_trans( side ) )
|
||||
bli_obj_create( dt, m, m, 0, 0, &d );
|
||||
else
|
||||
bli_obj_create( dt, n, n, 0, 0, &d );
|
||||
|
||||
bli_randm( &a );
|
||||
bli_randm( &c );
|
||||
|
||||
@@ -179,8 +174,8 @@ int main( int argc, char** argv )
|
||||
bli_randm( &a );
|
||||
bli_mktrim( &a );
|
||||
|
||||
bli_setd( &BLIS_TWO, &d );
|
||||
bli_addd( &d, &a );
|
||||
// Load the diagonal of A to make it more likely to be invertible.
|
||||
bli_shiftd( &BLIS_TWO, &a );
|
||||
|
||||
bli_setsc( (2.0/1.0), 0.0, &alpha );
|
||||
|
||||
@@ -328,7 +323,6 @@ int main( int argc, char** argv )
|
||||
bli_obj_free( &a );
|
||||
bli_obj_free( &c );
|
||||
bli_obj_free( &c_save );
|
||||
bli_obj_free( &d );
|
||||
}
|
||||
|
||||
//bli_finalize();
|
||||
|
||||
@@ -144,13 +144,14 @@ int main( int argc, char** argv )
|
||||
bli_obj_set_conjtrans( transa, &a );
|
||||
bli_obj_set_diag( diaga, &a );
|
||||
|
||||
// Randomize A, make it densely Hermitian, and zero the unstored
|
||||
// triangle to ensure the implementation reads only from the stored
|
||||
// region.
|
||||
// Randomize A and zero the unstored triangle to ensure the
|
||||
// implementation reads only from the stored region.
|
||||
bli_randm( &a );
|
||||
bli_mkherm( &a );
|
||||
bli_mktrim( &a );
|
||||
|
||||
// Load the diagonal of A to make it more likely to be invertible.
|
||||
bli_shiftd( &BLIS_TWO, &a );
|
||||
|
||||
bli_setsc( (2.0/1.0), 1.0, &alpha );
|
||||
|
||||
|
||||
|
||||
@@ -114,6 +114,14 @@ int main( int argc, char** argv )
|
||||
bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a );
|
||||
bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a );
|
||||
|
||||
// Randomize A and zero the unstored triangle to ensure the
|
||||
// implementation reads only from the stored region.
|
||||
bli_randm( &a );
|
||||
bli_mktrim( &a );
|
||||
|
||||
// Load the diagonal of A to make it more likely to be invertible.
|
||||
bli_shiftd( &BLIS_TWO, &a );
|
||||
|
||||
bli_setsc( (1.0/1.0), 0.0, &alpha );
|
||||
|
||||
|
||||
|
||||
@@ -1,70 +0,0 @@
|
||||
//#ifdef __APPLE__
|
||||
//#if !defined(_POSIX_BARRIERS) || (_POSIX_BARRIERS < 0)
|
||||
#if !defined(_POSIX_BARRIERS) || (_POSIX_BARRIERS != 200809L)
|
||||
|
||||
#ifndef PTHREAD_BARRIER_H_
|
||||
#define PTHREAD_BARRIER_H_
|
||||
|
||||
#include <pthread.h>
|
||||
#include <errno.h>
|
||||
|
||||
typedef int pthread_barrierattr_t;
|
||||
typedef struct
|
||||
{
|
||||
pthread_mutex_t mutex;
|
||||
pthread_cond_t cond;
|
||||
int count;
|
||||
int tripCount;
|
||||
} pthread_barrier_t;
|
||||
|
||||
|
||||
inline int pthread_barrier_init(pthread_barrier_t *barrier, const pthread_barrierattr_t *attr, unsigned int count)
|
||||
{
|
||||
if(count == 0)
|
||||
{
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
if(pthread_mutex_init(&barrier->mutex, 0) < 0)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
if(pthread_cond_init(&barrier->cond, 0) < 0)
|
||||
{
|
||||
pthread_mutex_destroy(&barrier->mutex);
|
||||
return -1;
|
||||
}
|
||||
barrier->tripCount = count;
|
||||
barrier->count = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline int pthread_barrier_destroy(pthread_barrier_t *barrier)
|
||||
{
|
||||
pthread_cond_destroy(&barrier->cond);
|
||||
pthread_mutex_destroy(&barrier->mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline int pthread_barrier_wait(pthread_barrier_t *barrier)
|
||||
{
|
||||
pthread_mutex_lock(&barrier->mutex);
|
||||
++(barrier->count);
|
||||
if(barrier->count >= barrier->tripCount)
|
||||
{
|
||||
barrier->count = 0;
|
||||
pthread_cond_broadcast(&barrier->cond);
|
||||
pthread_mutex_unlock(&barrier->mutex);
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
pthread_cond_wait(&barrier->cond, &(barrier->mutex));
|
||||
pthread_mutex_unlock(&barrier->mutex);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // PTHREAD_BARRIER_H_
|
||||
#endif // _POSIX_BARRIERS
|
||||
@@ -126,20 +126,20 @@ void libblis_test_thread_decorator( test_params_t* params, test_ops_t* ops )
|
||||
|
||||
// Allocate an array of pthread objects and auxiliary data structs to pass
|
||||
// to the thread entry functions.
|
||||
pthread_t* pthread = bli_malloc_intl( sizeof( pthread_t ) * nt );
|
||||
thread_data_t* tdata = bli_malloc_intl( sizeof( thread_data_t ) * nt );
|
||||
bli_pthread_t* pthread = bli_malloc_intl( sizeof( bli_pthread_t ) * nt );
|
||||
thread_data_t* tdata = bli_malloc_intl( sizeof( thread_data_t ) * nt );
|
||||
|
||||
// Allocate a mutex for the threads to share.
|
||||
//pthread_mutex_t* mutex = bli_malloc_intl( sizeof( pthread_mutex_t ) );
|
||||
//bli_pthread_mutex_t* mutex = bli_malloc_intl( sizeof( bli_pthread_mutex_t ) );
|
||||
|
||||
// Allocate a barrier for the threads to share.
|
||||
pthread_barrier_t* barrier = bli_malloc_intl( sizeof( pthread_barrier_t ) );
|
||||
bli_pthread_barrier_t* barrier = bli_malloc_intl( sizeof( bli_pthread_barrier_t ) );
|
||||
|
||||
// Initialize the mutex.
|
||||
//pthread_mutex_init( mutex, NULL );
|
||||
//bli_pthread_mutex_init( mutex, NULL );
|
||||
|
||||
// Initialize the barrier for nt threads.
|
||||
pthread_barrier_init( barrier, NULL, nt );
|
||||
bli_pthread_barrier_init( barrier, NULL, nt );
|
||||
|
||||
// NOTE: We must iterate backwards so that the chief thread (thread id 0)
|
||||
// can spawn all other threads before proceeding with its own computation.
|
||||
@@ -157,7 +157,7 @@ void libblis_test_thread_decorator( test_params_t* params, test_ops_t* ops )
|
||||
|
||||
// Spawn additional threads for ids greater than 1.
|
||||
if ( id != 0 )
|
||||
pthread_create( &pthread[id], NULL, libblis_test_thread_entry, &tdata[id] );
|
||||
bli_pthread_create( &pthread[id], NULL, libblis_test_thread_entry, &tdata[id] );
|
||||
else
|
||||
libblis_test_thread_entry( ( void* )(&tdata[0]) );
|
||||
}
|
||||
@@ -165,14 +165,14 @@ void libblis_test_thread_decorator( test_params_t* params, test_ops_t* ops )
|
||||
// Thread 0 waits for additional threads to finish.
|
||||
for ( unsigned int id = 1; id < nt; id++ )
|
||||
{
|
||||
pthread_join( pthread[id], NULL );
|
||||
bli_pthread_join( pthread[id], NULL );
|
||||
}
|
||||
|
||||
// Destroy the mutex.
|
||||
//pthread_mutex_destroy( mutex );
|
||||
//bli_pthread_mutex_destroy( mutex );
|
||||
|
||||
// Destroy the barrier.
|
||||
pthread_barrier_destroy( barrier );
|
||||
bli_pthread_barrier_destroy( barrier );
|
||||
|
||||
// Free the pthread-related memory.
|
||||
bli_free_intl( pthread );
|
||||
@@ -2267,9 +2267,9 @@ void libblis_test_op_driver
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Wait for all other threads so that the output stays organized.
|
||||
pthread_barrier_wait( tdata->barrier );
|
||||
bli_pthread_barrier_wait( tdata->barrier );
|
||||
|
||||
// These statements should only be executed by one thread.
|
||||
if ( tdata->id == 0 )
|
||||
@@ -2694,21 +2694,10 @@ void libblis_test_ceil_pow2( obj_t* alpha )
|
||||
|
||||
void libblis_test_mobj_load_diag( test_params_t* params, obj_t* a )
|
||||
{
|
||||
num_t dt = bli_obj_dt( a );
|
||||
dim_t m = bli_obj_length( a );
|
||||
dim_t n = bli_obj_width( a );
|
||||
|
||||
obj_t d;
|
||||
|
||||
// We assume that all elements of a were intialized on interval [-1,1].
|
||||
|
||||
bli_obj_create( dt, m, n, 0, 0, &d );
|
||||
|
||||
// Initialize the diagonal of d to 2.0 and then add the diagonal of a.
|
||||
bli_setd( &BLIS_TWO, &d );
|
||||
bli_addd( &d, a );
|
||||
|
||||
bli_obj_free( &d );
|
||||
// Load the diagonal by 2.0.
|
||||
bli_shiftd( &BLIS_TWO, a );
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -52,14 +52,6 @@
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
// For pthreads API.
|
||||
#include <pthread.h>
|
||||
//#ifdef __APPLE__
|
||||
//#if !defined(_POSIX_BARRIERS) || (_POSIX_BARRIERS < 0)
|
||||
#if !defined(_POSIX_BARRIERS) || (_POSIX_BARRIERS != 200809L)
|
||||
#include "pthread_barrier.h"
|
||||
#endif
|
||||
|
||||
//
|
||||
// --- Constants and types -----------------------------------------------------
|
||||
//
|
||||
@@ -303,13 +295,13 @@ typedef struct
|
||||
|
||||
typedef struct thread_data
|
||||
{
|
||||
test_params_t* params;
|
||||
test_ops_t* ops;
|
||||
unsigned int nt;
|
||||
unsigned int id;
|
||||
unsigned int xc;
|
||||
//pthread_mutex_t* mutex;
|
||||
pthread_barrier_t* barrier;
|
||||
test_params_t* params;
|
||||
test_ops_t* ops;
|
||||
unsigned int nt;
|
||||
unsigned int id;
|
||||
unsigned int xc;
|
||||
//bli_pthread_mutex_t* mutex;
|
||||
bli_pthread_barrier_t* barrier;
|
||||
} thread_data_t;
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user