Merge branch 'master' into dev

This commit is contained in:
Field G. Van Zee
2018-10-19 18:41:10 -05:00
39 changed files with 1660 additions and 670 deletions

View File

@@ -10,7 +10,7 @@ install:
- call %CONDA_INSTALL_LOCN%\Scripts\activate.bat
- conda update --yes --quiet conda
- conda config --add channels conda-forge
- conda install --yes clangdev posix m2-make pthreads-win32
- conda install --yes clangdev posix m2-make
- call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64
- set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%"
- set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%"
@@ -22,10 +22,10 @@ build_script:
- set RANLIB=echo
- set AR=llvm-ar
- set AS=llvm-as
- set LIBPTHREAD=-lpthreads
- set LIBPTHREAD=
- set "PATH=%PATH%;C:\projects\blis\lib\%CONFIG%"
- set "CFLAGS=-Wno-macro-redefined"
- bash -lc "source activate && cd /c/projects/blis && ./configure %CONFIGURE_OPTS% --enable-cblas --enable-threading=pthreads --enable-arg-max-hack --prefix=/c/blis %CONFIG%"
- bash -lc "source activate && cd /c/projects/blis && ./configure %CONFIGURE_OPTS% --enable-cblas --disable-threading --enable-arg-max-hack --prefix=/c/blis %CONFIG%"
- bash -lc "source activate && cd /c/projects/blis && make -j4 V=1"
- bash -lc "source activate && cd /c/projects/blis && make install"
- ps: Compress-Archive -Path C:\blis -DestinationPath C:\blis.zip

View File

@@ -33,11 +33,7 @@
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <pthread.h>
#include "bli_system.h"
#include "bli_type_defs.h"
#include "bli_arch.h"
#include "bli_cpuid.h"

85
build/regen-symbols.sh Executable file
View File

@@ -0,0 +1,85 @@
#!/bin/sh
#
# BLIS
# An object-based framework for developing high-performance BLAS-like
# libraries.
#
# Copyright (C) 2018, The University of Texas at Austin
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# - Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# - Neither the name of copyright holder(s) nor the names
# of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
#
# This script regenerates a list of symbols for use when building
# Windows-compatible DLLs. We assume that this script will be run after
# running configure as:
#
# ./configure --enable-cblas haswell
#
# and compiling BLIS normally. (Notice that we also prune out all
# haswell/zen-related context initialization and reference kernels.)
#
libblis='lib/haswell/libblis.so'
symfile='build/libblis-symbols.def'
echo "EXPORTS" > def.exports
#nm -g ${libblis} | grep -o " D BLIS_.*" | cut -f2- "-dD" > def.blis_const
nm -g ${libblis} | grep -o " T bli_.*" | cut -f2- "-dT" > def.blis
nm -g ${libblis} | grep -o " T bla_.*" | cut -f2- "-dT" > def.blis_bla
nm -g ${libblis} | grep -o " T cblas_.*" | cut -f2- "-dT" > def.blis_cblas
nm -g ${libblis} | grep -o " T s[acdgnrst].*" | cut -f2- "-dT" > def.blas_s
nm -g ${libblis} | grep -o " T d[acdgnrstz].*" | cut -f2- "-dT" > def.blas_d
nm -g ${libblis} | grep -o " T c[acdghrst].*" | cut -f2- "-dT" > def.blas_c
nm -g ${libblis} | grep -o " T z[acdghrst].*" | cut -f2- "-dT" > def.blas_z
nm -g ${libblis} | grep -o " T i[cdsz].*" | cut -f2- "-dT" > def.blas_i
cat def.exports \
def.blis \
def.blis_bla \
def.blas_s \
def.blas_d \
def.blas_c \
def.blas_z \
def.blas_i \
def.blis_cblas \
| cut -f2- "-d " \
| grep -v init_haswell \
| grep -v haswell_ref \
| grep -v zen_ref \
> ${symfile}
rm -f \
def.exports \
def.blis \
def.blis_bla \
def.blas_s \
def.blas_d \
def.blas_c \
def.blas_z \
def.blas_i \
def.blis_cblas

View File

@@ -497,9 +497,9 @@ SOFLAGS := -shared
ifeq ($(IS_WIN),yes)
# Windows shared library link flags.
ifeq ($(CC_VENDOR),clang)
SOFLAGS += -Wl,-def:windows/build/libblis-symbols.def -Wl,-implib:$(BASE_LIB_PATH)/$(LIBBLIS).lib
SOFLAGS += -Wl,-def:build/libblis-symbols.def -Wl,-implib:$(BASE_LIB_PATH)/$(LIBBLIS).lib
else
SOFLAGS += windows/build/libblis-symbols.def -Wl,--out-implib,$(LIBBLIS).dll.a
SOFLAGS += build/libblis-symbols.def -Wl,--out-implib,$(LIBBLIS).dll.a
endif
else
# Linux shared library link flags.

547
configure vendored
View File

@@ -266,6 +266,8 @@ print_usage()
echo " AR Specifies the archiver to use."
echo " CFLAGS Specifies additional compiler flags to use (prepended)."
echo " LDFLAGS Specifies additional linker flags to use (prepended)."
echo " LIBPTHREAD Pthreads library to use."
echo " PYTHON Specifies the python interpreter to use."
echo " "
echo " Environment variables may also be specified as command line"
echo " options, e.g.:"
@@ -901,32 +903,32 @@ get_cxx_search_list()
echo "${list}"
}
select_cc()
select_tool()
{
local search_list CC_env the_cc cc
# This is the list of compilers to search for, and the order in which
# to search for them.
# This is the list of compilers/tools to search for, and the order in
# which to search for them.
search_list=$1
# The environment variable associated with the compiler type we
# are searching (e.g. CC, CXX).
# The environment variable associated with the compiler/tool type we
# are searching (e.g. CC, CXX, PYTHON).
CC_env=$2
# If CC contains something, add it to the beginning of our default
# If CC_env contains something, add it to the beginning of our default
# search list.
if [ -n "${CC_env}" ]; then
search_list="${CC_env} ${search_list}"
fi
# Initialize our selected compiler to empty.
# Initialize our selected compiler/tool to empty.
the_cc=""
# Try each compiler in the list and select the first one we find that
# Try each compiler/tool in the list and select the first one we find that
# works.
for cc in ${search_list}; do
# See if the current compiler works and/or is present.
# See if the current compiler/tool works and/or is present.
${cc} --version > /dev/null 2>&1
if [ "$?" == 0 ]; then
@@ -935,7 +937,7 @@ select_cc()
fi
done
# Return the selected compiler.
# Return the selected compiler/tool.
echo "${the_cc}"
}
@@ -995,7 +997,9 @@ auto_detect()
# Set the linker flags. We need pthreads because it is needed for
# parts of bli_arch.c unrelated to bli_arch_string(), which is called
# by the main() function in ${main_c}.
ldflags="${LIBPTHREAD:--lpthread}"
if [ $is_win = no ]; then
ldflags="${LIBPTHREAD--lpthread}"
fi
# Compile the auto-detect program using source code inside the
# framework.
@@ -1105,6 +1109,12 @@ echoerr_unsupportedcc()
exit 1
}
echoerr_unsupportedpython()
{
echoerr "${script_name}: *** Unsupported python version: ${python_version}."
exit 1
}
get_binutils_version()
{
binutil=${AS:-as}
@@ -1137,6 +1147,76 @@ get_binutils_version()
echo "${script_name}: found assembler ('as') version ${bu_version} (maj: ${bu_major}, min: ${bu_minor}, rev: ${bu_revision})."
}
get_python_search_list()
{
local list
# For Linux, Darwin (OS X), and generic OSes, prioritize 'python'.
list="python python3 python2"
echo "${list}"
}
get_python_version()
{
local python vendor_string
python="${found_python}"
# Query the python version. This includes the version number along
# with other text, such as "Python ".
# NOTE: Python seems to echo its version info to stderr, not
# stdout, and thus we redirect stderr to stdout and capture that.
vendor_string="$(${python} --version 2>&1)"
# Drop any preceding text and save only the first numbers and what
# comes after.
python_version=$(echo "${vendor_string}" | sed -e "s/[a-zA-Z_ ]* \([0-9]*\..*\)/\1/g")
# Parse the version number into its major, minor, and revision
# components.
python_major=$(echo "${python_version}" | cut -d. -f1)
python_minor=$(echo "${python_version}" | cut -d. -f2)
python_revision=$(echo "${python_version}" | cut -d. -f3)
echo "${script_name}: found python version ${python_version} (maj: ${python_major}, min: ${python_minor}, rev: ${python_revision})."
}
check_python()
{
local python
python="${found_python}"
#
# Python requirements
#
# python1: no versions supported
# python2: 2.7+
# python3: 3.5+
#
# Python 1.x is unsupported.
if [ ${python_major} -eq 1 ]; then
echoerr_unsupportedpython
fi
# Python 2.6.x or older is unsupported.
if [ ${python_major} -eq 2 ]; then
if [ ${python_minor} -lt 7 ]; then
echoerr_unsupportedpython
fi
fi
# Python 3.4.x or older is unsupported.
if [ ${python_major} -eq 3 ]; then
if [ ${python_minor} -lt 5 ]; then
echoerr_unsupportedpython
fi
fi
echo "${script_name}: python ${python_version} appears to be supported."
}
get_compiler_version()
{
local cc vendor_string
@@ -1676,224 +1756,254 @@ main()
# -- Command line option/argument parsing ----------------------------------
# Process our command line options.
while getopts ":hp:d:s:t:r:qci:b:-:" opt; do
case $opt in
-)
case "$OPTARG" in
help)
print_usage
;;
quiet)
quiet_flag=1
;;
prefix=*)
prefix_flag=1
install_prefix_user=${OPTARG#*=}
;;
libdir=*)
libdir_flag=1
install_libdir_user=${OPTARG#*=}
;;
includedir=*)
incdir_flag=1
install_incdir_user=${OPTARG#*=}
;;
sharedir=*)
sharedir_flag=1
install_sharedir_user=${OPTARG#*=}
;;
enable-debug)
debug_flag=1
debug_type=noopt
;;
enable-debug=*)
debug_flag=1
debug_type=${OPTARG#*=}
;;
disable-debug)
debug_flag=0
;;
enable-verbose-make)
enable_verbose='yes'
;;
disable-verbose-make)
enable_verbose='no'
;;
enable-arg-max-hack)
enable_arg_max_hack='yes'
;;
disable-arg-max-hack)
enable_arg_max_hack='no'
;;
enable-static)
enable_static='yes'
;;
disable-static)
enable_static='no'
;;
enable-shared)
enable_shared='yes'
;;
disable-shared)
enable_shared='no'
;;
enable-threading=*)
threading_model=${OPTARG#*=}
;;
thread-part-jrir=*)
thread_part_jrir=${OPTARG#*=}
;;
disable-threading)
threading_model='no'
;;
enable-packbuf-pools)
enable_packbuf_pools='yes'
;;
disable-packbuf-pools)
enable_packbuf_pools='no'
;;
enable-sandbox=*)
sandbox_flag=1
sandbox=${OPTARG#*=}
;;
disable-sandbox)
sandbox_flag=0
;;
int-size=*)
int_type_size=${OPTARG#*=}
;;
blas-int-size=*)
blas_int_type_size=${OPTARG#*=}
;;
enable-blas)
enable_blas='yes'
;;
disable-blas)
enable_blas='no'
;;
enable-cblas)
enable_cblas='yes'
;;
disable-cblas)
enable_cblas='no'
;;
enable-mixed-dt)
enable_mixed_dt='yes'
;;
disable-mixed-dt)
enable_mixed_dt='no'
;;
enable-mixed-dt-extra-mem)
enable_mixed_dt_extra_mem='yes'
;;
disable-mixed-dt-extra-mem)
enable_mixed_dt_extra_mem='no'
;;
with-memkind)
enable_memkind='yes'
;;
without-memkind)
enable_memkind='no'
;;
force-version=*)
force_version=${OPTARG#*=}
;;
show-config-list)
show_config_list=1
;;
*)
print_usage
;;
esac;;
h)
print_usage
;;
p)
prefix_flag=1
install_prefix_user=$OPTARG
;;
d)
debug_flag=1
debug_type=$OPTARG
;;
s)
sandbox_flag=1
sandbox=$OPTARG
;;
q)
quiet_flag=1
;;
t)
threading_model=$OPTARG
;;
r)
thread_part_jrir=$OPTARG
;;
i)
int_type_size=$OPTARG
;;
b)
blas_int_type_size=$OPTARG
;;
c)
show_config_list=1
;;
\?)
print_usage
;;
esac
done
shift $(($OPTIND - 1))
found=true
while $found = true; do
# Parse environment variables
while [ $# -gt 0 ]; do
case $1 in
CC=*)
CC=${1#*=}
shift
;;
RANLIB=*)
RANLIB=${1#*=}
shift
;;
AR=*)
AR=${1#*=}
shift
;;
*=*)
print_usage
;;
*)
break
;;
esac
# Process our command line options.
unset OPTIND
while getopts ":hp:d:s:t:r:qci:b:-:" opt; do
case $opt in
-)
case "$OPTARG" in
help)
print_usage
;;
quiet)
quiet_flag=1
;;
prefix=*)
prefix_flag=1
install_prefix_user=${OPTARG#*=}
;;
libdir=*)
libdir_flag=1
install_libdir_user=${OPTARG#*=}
;;
includedir=*)
incdir_flag=1
install_incdir_user=${OPTARG#*=}
;;
sharedir=*)
sharedir_flag=1
install_sharedir_user=${OPTARG#*=}
;;
enable-debug)
debug_flag=1
debug_type=noopt
;;
enable-debug=*)
debug_flag=1
debug_type=${OPTARG#*=}
;;
disable-debug)
debug_flag=0
;;
enable-verbose-make)
enable_verbose='yes'
;;
disable-verbose-make)
enable_verbose='no'
;;
enable-arg-max-hack)
enable_arg_max_hack='yes'
;;
disable-arg-max-hack)
enable_arg_max_hack='no'
;;
enable-static)
enable_static='yes'
;;
disable-static)
enable_static='no'
;;
enable-shared)
enable_shared='yes'
;;
disable-shared)
enable_shared='no'
;;
enable-threading=*)
threading_model=${OPTARG#*=}
;;
thread-part-jrir=*)
thread_part_jrir=${OPTARG#*=}
;;
disable-threading)
threading_model='no'
;;
enable-packbuf-pools)
enable_packbuf_pools='yes'
;;
disable-packbuf-pools)
enable_packbuf_pools='no'
;;
enable-sandbox=*)
sandbox_flag=1
sandbox=${OPTARG#*=}
;;
disable-sandbox)
sandbox_flag=0
;;
int-size=*)
int_type_size=${OPTARG#*=}
;;
blas-int-size=*)
blas_int_type_size=${OPTARG#*=}
;;
enable-blas)
enable_blas='yes'
;;
disable-blas)
enable_blas='no'
;;
enable-cblas)
enable_cblas='yes'
;;
disable-cblas)
enable_cblas='no'
;;
enable-mixed-dt)
enable_mixed_dt='yes'
;;
disable-mixed-dt)
enable_mixed_dt='no'
;;
enable-mixed-dt-extra-mem)
enable_mixed_dt_extra_mem='yes'
;;
disable-mixed-dt-extra-mem)
enable_mixed_dt_extra_mem='no'
;;
with-memkind)
enable_memkind='yes'
;;
without-memkind)
enable_memkind='no'
;;
force-version=*)
force_version=${OPTARG#*=}
;;
show-config-list)
show_config_list=1
;;
*)
print_usage
;;
esac;;
h)
print_usage
;;
p)
prefix_flag=1
install_prefix_user=$OPTARG
;;
d)
debug_flag=1
debug_type=$OPTARG
;;
s)
sandbox_flag=1
sandbox=$OPTARG
;;
q)
quiet_flag=1
;;
t)
threading_model=$OPTARG
;;
r)
thread_part_jrir=$OPTARG
;;
i)
int_type_size=$OPTARG
;;
b)
blas_int_type_size=$OPTARG
;;
c)
show_config_list=1
;;
\?)
print_usage
;;
esac
done
shift $(($OPTIND - 1))
# Parse environment variables
found=false
while [ $# -gt 0 ]; do
case $1 in
*=*)
var=`expr "$1" : '\([^=]*\)='`
value=`expr "$1" : '[^=]*=\(.*\)'`
eval $var=\$value
export $var
shift
found=true
;;
*)
break
;;
esac
done
done
# -- Check the operating system --------------------------------------------
os_name=$(uname -s)
os_vers=$(uname -r)
echo "${script_name}: detected ${os_name} kernel version ${os_vers}."
# Define a single variable off of which we can branch to tell if we are
# building for Windows.
is_win=no
if [[ $os_name == MSYS* ]] || [[ $os_name == MINGW* ]] || [[ $os_name == CYGWIN* ]] ;
then
is_win=yes
fi
# Define a single variable off of which we can branch to tell if we are
# building for Windows.
is_win=no
if [[ $os_name == MSYS* ]] || \
[[ $os_name == MINGW* ]] || \
[[ $os_name == CYGWIN* ]] ; then
is_win=yes
fi
# -- Find a python interpreter ---------------------------------------------
# Acquire the python search order. This may vary based on the os found
# above.
python_search_list=$(get_python_search_list)
echo "${script_name}: python interpeter search list is: ${python_search_list}."
# Find a working python interpreter.
found_python=$(select_tool "${python_search_list}" "${PYTHON}")
# If we didn't find any working python interpreters, we print an error
# message.
if [ -z "${found_python}" ]; then
echo "${script_name}: *** Could not find working python interperter! Cannot continue."
exit 1
fi
echo "${script_name}: using '${found_python}' python interpreter."
# -- Check the python version ----------------------------------------------
# Check the python interpreter's version.
get_python_version
check_python
# -- Find a C compiler -----------------------------------------------------
# Acquire the compiler search order. This will vary based on the os
# found above.
# Acquire the compiler search order. This will vary based on the os found
# above.
cc_search_list=$(get_cc_search_list)
echo "${script_name}: C compiler search list is: ${cc_search_list}."
# Find a working C compiler.
found_cc=$(select_cc "${cc_search_list}" "${CC}")
found_cc=$(select_tool "${cc_search_list}" "${CC}")
# If we didn't find any working C compilers, we print an error message.
if [ -z "${found_cc}" ]; then
@@ -1912,9 +2022,9 @@ main()
echo "${script_name}: C++ compiler search list is: ${cxx_search_list}."
# Find a working C++ compiler. NOTE: We can reuse the select_cc()
# Find a working C++ compiler. NOTE: We can reuse the select_tool()
# function since it is written in a way that is general-purpose.
found_cxx=$(select_cc "${cxx_search_list}" "${CXX}")
found_cxx=$(select_tool "${cxx_search_list}" "${CXX}")
# If we didn't find any working C++ compilers, we print an error message.
if [ -z "${found_cxx}" ]; then
@@ -2558,10 +2668,17 @@ main()
ranlib_esc=$(echo "${RANLIB:-ranlib}" | sed 's/\//\\\//g')
# For AR, if the variable is not set, we use a default value of 'ar'.
ar_esc=$(echo "${AR:-ar}" | sed 's/\//\\\//g')
libpthread_esc=$(echo "${LIBPTHREAD:--lpthread}" | sed 's/\//\\\//g')
libpthread_esc=$(echo "${LIBPTHREAD--lpthread}" | sed 's/\//\\\//g')
cflags_preset_esc=$(echo "${cflags_preset}" | sed 's/\//\\\//g')
ldflags_preset_esc=$(echo "${ldflags_preset}" | sed 's/\//\\\//g')
# For Windows builds, clear the libpthread_esc variable so that
# no pthreads library is substituted into config.mk. (Windows builds
# employ an implementation of pthreads that is internal to BLIS.)
if [ $is_win = yes ]; then
libpthread_esc=
fi
# Typically, there are no slashes in the version variable. However,
# downstream maintainers (such as those for Debian) may create custom
# tags in their local clones such as "upstream/0.4.1", which obviously

View File

@@ -597,7 +597,7 @@ Notes for interpreting function descriptions:
* **[Level-1v](BLISObjectAPI.md#level-1v-operations)**: Operations on vectors:
* [addv](BLISObjectAPI.md#addv), [amaxv](BLISObjectAPI.md#amaxv), [axpyv](BLISObjectAPI.md#axpyv), [axpbyv](BLISObjectAPI.md#axpbyv), [copyv](BLISObjectAPI.md#copyv), [dotv](BLISObjectAPI.md#dotv), [dotxv](BLISObjectAPI.md#dotxv), [invertv](BLISObjectAPI.md#invertv), [scal2v](BLISObjectAPI.md#scal2v), [scalv](BLISObjectAPI.md#scalv), [setv](BLISObjectAPI.md#setv), [setrv](BLISObjectAPI.md#setrv), [setiv](BLISObjectAPI.md#setiv), [subv](BLISObjectAPI.md#subv), [swapv](BLISObjectAPI.md#swapv), [xpbyv](BLISObjectAPI.md#xpbyv)
* **[Level-1d](BLISObjectAPI.md#level-1d-operations)**: Element-wise operations on matrix diagonals:
* [addd](BLISObjectAPI.md#addd), [axpyd](BLISObjectAPI.md#axpyd), [copyd](BLISObjectAPI.md#copyd), [invertd](BLISObjectAPI.md#invertd), [scald](BLISObjectAPI.md#scald), [scal2d](BLISObjectAPI.md#scal2d), [setd](BLISObjectAPI.md#setd), [setid](BLISObjectAPI.md#setid), [subd](BLISObjectAPI.md#subd)
* [addd](BLISObjectAPI.md#addd), [axpyd](BLISObjectAPI.md#axpyd), [copyd](BLISObjectAPI.md#copyd), [invertd](BLISObjectAPI.md#invertd), [scald](BLISObjectAPI.md#scald), [scal2d](BLISObjectAPI.md#scal2d), [setd](BLISObjectAPI.md#setd), [setid](BLISObjectAPI.md#setid), [shiftd](BLISObjectAPI.md#shiftd), [subd](BLISObjectAPI.md#subd), [xpbyd](BLISObjectAPI.md#xpbyd)
* **[Level-1m](BLISObjectAPI.md#level-1m-operations)**: Element-wise operations on matrices:
* [addm](BLISObjectAPI.md#addm), [axpym](BLISObjectAPI.md#axpym), [copym](BLISObjectAPI.md#copym), [scalm](BLISObjectAPI.md#scalm), [scal2m](BLISObjectAPI.md#scal2m), [setm](BLISObjectAPI.md#setm), [setrm](BLISObjectAPI.md#setrm), [setim](BLISObjectAPI.md#setim), [subm](BLISObjectAPI.md#subm)
* **[Level-1f](BLISObjectAPI.md#level-1f-operations)**: Fused operations on multiple vectors:
@@ -771,6 +771,8 @@ Perform
```
where `x` is a vector of length _n_, and `alpha` is a scalar.
Observed object properties: `conj?(alpha)`.
---
#### scal2v
@@ -788,6 +790,8 @@ Perform
```
where `x` and `y` are vectors of length _n_, and `alpha` is a scalar.
Observed object properties: `conj?(alpha)`, `conj?(x)`.
---
#### setv
@@ -804,6 +808,8 @@ Perform
```
That is, set all elements of an _n_-length vector `x` to scalar `conj?(alpha)`.
Observed object properties: `conj?(alpha)`.
---
#### setrv
@@ -856,6 +862,8 @@ Perform
```
where `x` and `y` are vectors of length _n_.
Observed object properties: `conj?(x)`.
---
#### swapv
@@ -885,6 +893,8 @@ Perform
```
where `x` and `y` are vectors of length _n_, and `beta` is a scalar.
Observed object properties: `conj?(beta)`, `conj?(x)`.
---
@@ -908,6 +918,8 @@ void bli_addd
);
```
Observed object properties: `diagoff(A)`, `diag(A)`, `trans?(A)`.
---
#### axpyd
@@ -920,6 +932,8 @@ void bli_axpyd
);
```
Observed object properties: `conj?(alpha)`, `diagoff(A)`, `diag(A)`, `trans?(A)`.
---
#### copyd
@@ -931,6 +945,8 @@ void bli_copyd
);
```
Observed object properties: `diagoff(A)`, `diag(A)`, `trans?(A)`.
---
#### invertd
@@ -941,6 +957,8 @@ void bli_invertd
);
```
Observed object properties: `diagoff(A)`.
---
#### scald
@@ -952,6 +970,8 @@ void bli_scald
);
```
Observed object properties: `conj?(alpha)`, `diagoff(A)`.
---
#### scal2d
@@ -964,6 +984,8 @@ void bli_scal2d
);
```
Observed object properties: `conj?(alpha)`, `diagoff(A)`, `diag(A)`, `trans?(A)`.
---
#### setd
@@ -975,6 +997,8 @@ void bli_setd
);
```
Observed object properties: `conj?(alpha)`, `diagoff(A)`, `diag(A)`.
---
#### setid
@@ -985,7 +1009,26 @@ void bli_setid
obj_t* a
);
```
Set the imaginary components of a matrix diagonal to a scalar `alpha`.
Set the imaginary components of every element along the diagonal of `a`
to a scalar `alpha`.
Note that the datatype of `alpha` must be the real projection of the datatype
of `a`.
Observed object properties: `diagoff(A)`.
---
#### shiftd
```c
void bli_shiftd
(
obj_t* alpha,
obj_t* a
);
```
Add a constant value `alpha` to every element along the diagonal of `a`.
Observed object properties: `diagoff(A)`.
---
@@ -998,6 +1041,22 @@ void bli_subd
);
```
Observed object properties: `diagoff(A)`, `diag(A)`, `trans?(A)`.
---
#### xpbyd
```c
void bli_xpbyd
(
obj_t* a,
obj_t* beta,
obj_t* b
);
```
Observed object properties: `conj?(beta)`, `diagoff(A)`, `diag(A)`, `trans?(A)`.
---

View File

@@ -195,7 +195,7 @@ Notes for interpreting function descriptions:
* **[Level-1v](BLISTypedAPI.md#level-1v-operations)**: Operations on vectors:
* [addv](BLISTypedAPI.md#addv), [amaxv](BLISTypedAPI.md#amaxv), [axpyv](BLISTypedAPI.md#axpyv), [axpbyv](BLISTypedAPI.md#axpbyv), [copyv](BLISTypedAPI.md#copyv), [dotv](BLISTypedAPI.md#dotv), [dotxv](BLISTypedAPI.md#dotxv), [invertv](BLISTypedAPI.md#invertv), [scal2v](BLISTypedAPI.md#scal2v), [scalv](BLISTypedAPI.md#scalv), [setv](BLISTypedAPI.md#setv), [subv](BLISTypedAPI.md#subv), [swapv](BLISTypedAPI.md#swapv), [xpbyv](BLISTypedAPI.md#xpbyv)
* **[Level-1d](BLISTypedAPI.md#level-1d-operations)**: Element-wise operations on matrix diagonals:
* [addd](BLISTypedAPI.md#addd), [axpyd](BLISTypedAPI.md#axpyd), [copyd](BLISTypedAPI.md#copyd), [invertd](BLISTypedAPI.md#invertd), [scald](BLISTypedAPI.md#scald), [scal2d](BLISTypedAPI.md#scal2d), [setd](BLISTypedAPI.md#setd), [setid](BLISTypedAPI.md#setid), [subd](BLISTypedAPI.md#subd)
* [addd](BLISTypedAPI.md#addd), [axpyd](BLISTypedAPI.md#axpyd), [copyd](BLISTypedAPI.md#copyd), [invertd](BLISTypedAPI.md#invertd), [scald](BLISTypedAPI.md#scald), [scal2d](BLISTypedAPI.md#scal2d), [setd](BLISTypedAPI.md#setd), [setid](BLISTypedAPI.md#setid), [shiftd](BLISObjectAPI.md#shiftd), [subd](BLISTypedAPI.md#subd), [xpbyd](BLISTypedAPI.md#xpbyd)
* **[Level-1m](BLISTypedAPI.md#level-1m-operations)**: Element-wise operations on matrices:
* [addm](BLISTypedAPI.md#addm), [axpym](BLISTypedAPI.md#axpym), [copym](BLISTypedAPI.md#copym), [scalm](BLISTypedAPI.md#scalm), [scal2m](BLISTypedAPI.md#scal2m), [setm](BLISTypedAPI.md#setm), [subm](BLISTypedAPI.md#subm)
* **[Level-1f](BLISTypedAPI.md#level-1f-operations)**: Fused operations on multiple vectors:
@@ -476,7 +476,7 @@ where `x` and `y` are vectors of length _n_, and `beta` is a scalar.
Level-1d operations perform various level-1 BLAS-like operations on matrix diagonals (hence the _d_).
These operations are similar to their level-1m counterparts, except they only read and update matrix diagonals and therefore do not take any `uplo` arguments. Please see the descriptions for the corresponding level-1m operation for a description of the arguments.
Most of these operations are similar to level-1m counterparts, except they only read and update matrix diagonals and therefore do not take any `uplo` arguments. Please see the descriptions for the corresponding level-1m operation for a description of the arguments.
---
@@ -592,6 +592,24 @@ void bli_?setd
#### setid
```c
void bli_?setid
(
doff_t diagoffa,
dim_t m,
dim_t n,
ctype_r* alpha,
ctype* a, inc_t rsa, inc_t csa
);
```
Set the imaginary components of every element along the diagonal of `a`, as
specified by `diagoffa`, to a scalar `alpha`.
Note that the datatype of `alpha` must be the real projection of the datatype
of `a`.
---
#### shiftd
```c
void bli_?shiftd
(
doff_t diagoffa,
dim_t m,
@@ -600,7 +618,8 @@ void bli_?setid
ctype* a, inc_t rsa, inc_t csa
);
```
Set the imaginary components of a matrix diagonal to a scalar `alpha`.
Add a constant value `alpha` to every element along the diagonal of `a`, as
specified by `diagoffa`.
---
@@ -620,6 +639,23 @@ void bli_?subd
---
#### xpbyd
```c
void bli_?xpbyd
(
doff_t diagoffa,
diag_t diaga,
trans_t transa,
dim_t m,
dim_t n,
ctype* a, inc_t rsa, inc_t csa,
ctype* beta,
ctype* b, inc_t rsb, inc_t csb
);
```
---
## Level-1m operations

View File

@@ -22,10 +22,10 @@ The BLIS build system was designed for use with GNU/Linux (or some other sane UN
* Python (2.7 or later)
* GNU `bash` (3.2 or later)
* GNU `make`
* a working C compiler
* GNU `make` (3.81 or later)
* a working C99 compiler
BLIS also requires a POSIX threads library at link-time (`-lpthread` or `libpthread.so`). This requirement holds even when configuring BLIS with multithreading disabled (the default) or with multithreading via OpenMP (`--enable-multithreading=openmp`).
BLIS also requires a POSIX threads library at link-time (`-lpthread` or `libpthread.so`). This requirement holds even when configuring BLIS with multithreading disabled (the default) or with multithreading via OpenMP (`--enable-multithreading=openmp`). (Note: BLIS implements basic pthreads functionality automatically for Windows builds via [AppVeyor](https://ci.appveyor.com/project/shpc/blis/).)
Finally, we also require various other shell utilities that are so ubiquitous that they are not worth mentioning (such as `mv`, `mkdir`, `find`, and so forth). If you are missing these utilities, then you have much bigger problems than not being able to build BLIS.

View File

@@ -41,7 +41,7 @@ int main( int argc, char** argv )
dim_t m, n;
inc_t rs, cs;
obj_t a, x, y, b, d;
obj_t a, x, y, b;
obj_t* alpha;
obj_t* beta;
@@ -297,9 +297,7 @@ int main( int argc, char** argv )
// Load the diagonal. By setting the diagonal to something of greater
// absolute value than the off-diagonal elements, we increase the odds
// that the matrix is not singular (singular matrices have no inverse).
bli_obj_create( dt, m, m, 0, 0, &d );
bli_setd( &BLIS_TWO, &d );
bli_addd( &d, &a );
bli_shiftd( &BLIS_TWO, &a );
bli_printm( "a: randomized (zeros in upper triangle)", &a, "%4.1f", "" );
bli_printm( "b: initial value", &b, "%4.1f", "" );
@@ -320,7 +318,6 @@ int main( int argc, char** argv )
// Free the objects.
bli_obj_free( &a );
bli_obj_free( &b );
bli_obj_free( &d );
return 0;

View File

@@ -42,7 +42,7 @@ int main( int argc, char** argv )
inc_t rs, cs;
side_t side;
obj_t a, b, c, d;
obj_t a, b, c;
obj_t* alpha;
obj_t* beta;
@@ -299,9 +299,7 @@ int main( int argc, char** argv )
// Load the diagonal. By setting the diagonal to something of greater
// absolute value than the off-diagonal elements, we increase the odds
// that the matrix is not singular (singular matrices have no inverse).
bli_obj_create( dt, m, m, 0, 0, &d );
bli_setd( &BLIS_TWO, &d );
bli_addd( &d, &a );
bli_shiftd( &BLIS_TWO, &a );
bli_printm( "a: randomized (zeros in upper triangle)", &a, "%4.1f", "" );
bli_printm( "b: initial value", &b, "%4.1f", "" );
@@ -323,7 +321,6 @@ int main( int argc, char** argv )
bli_obj_free( &a );
bli_obj_free( &b );
bli_obj_free( &c );
bli_obj_free( &d );
return 0;

View File

@@ -41,7 +41,6 @@ int main( int argc, char** argv )
double* x;
double* y;
double* b;
double* d;
double alpha, beta;
dim_t m, n;
inc_t rs, cs;
@@ -286,10 +285,7 @@ int main( int argc, char** argv )
// Load the diagonal. By setting the diagonal to something of greater
// absolute value than the off-diagonal elements, we increase the odds
// that the matrix is not singular (singular matrices have no inverse).
d = malloc( m * m * sizeof( double ) );
bli_dsetd( BLIS_NO_CONJUGATE, 0, m, m, &two, d, 1, m );
bli_daddd( 0, BLIS_NONUNIT_DIAG, BLIS_NO_TRANSPOSE,
m, m, d, 1, m, a, rs, cs );
bli_dshiftd( 0, m, m, &two, a, rs, cs );
bli_dprintm( "a: randomized (zeros in upper triangle)", m, m, a, rs, cs, "%4.1f", "" );
bli_dprintm( "b: intial value", 1, m, b, m, 1, "%4.1f", "" );

View File

@@ -45,7 +45,6 @@ int main( int argc, char** argv )
double* a;
double* b;
double* c;
double* d;
double alpha, beta;
// Initialize some basic constants.
@@ -311,10 +310,7 @@ int main( int argc, char** argv )
// Load the diagonal. By setting the diagonal to something of greater
// absolute value than the off-diagonal elements, we increase the odds
// that the matrix is not singular (singular matrices have no inverse).
d = malloc( m * m * sizeof( double ) );
bli_dsetd( BLIS_NO_CONJUGATE, 0, m, m, &two, d, 1, m );
bli_daddd( 0, BLIS_NONUNIT_DIAG, BLIS_NO_TRANSPOSE,
m, m, d, 1, m, a, rsa, csa );
bli_dshiftd( 0, m, m, &two, a, rsa, csa );
bli_dprintm( "a: randomized (zeros in upper triangle)", m, m, a, rsa, csa, "%4.1f", "" );
bli_dprintm( "b: initial value", m, n, b, rsb, csb, "%4.1f", "" );
@@ -339,7 +335,6 @@ int main( int argc, char** argv )
free( a );
free( b );
free( c );
free( d );
return 0;

View File

@@ -101,6 +101,7 @@ void PASTEMAC(opname,_check) \
GENFRONT( scald )
GENFRONT( setd )
GENFRONT( setid )
GENFRONT( shiftd )
#undef GENFRONT

View File

@@ -88,6 +88,7 @@ void PASTEMAC(opname,_check) \
GENTPROT( scald )
GENTPROT( setd )
GENTPROT( setid )
GENTPROT( shiftd )
#undef GENTPROT

View File

@@ -59,5 +59,6 @@ GENFRONT( invertd )
GENFRONT( scald )
GENFRONT( setd )
GENFRONT( setid )
GENFRONT( shiftd )
GENFRONT( xpbyd )

View File

@@ -51,5 +51,6 @@ GENPROT( invertd )
GENPROT( scald )
GENPROT( setd )
GENPROT( setid )
GENPROT( shiftd )
GENPROT( xpbyd )

View File

@@ -131,6 +131,23 @@ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \
INSERT_GENTDEFR( setid )
// shiftd
#undef GENTDEF
#define GENTDEF( ctype, ch, opname, tsuf ) \
\
typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \
( \
doff_t diagoffx, \
dim_t m, \
dim_t n, \
ctype* alpha, \
ctype* x, inc_t rs_x, inc_t cs_x \
BLIS_TAPI_EX_PARAMS \
);
INSERT_GENTDEF( shiftd )
// xpbyd
#undef GENTDEF

View File

@@ -239,7 +239,7 @@ void PASTEMAC(opname,EX_SUF) \
/* Create local copy-casts of scalars (and apply internal conjugation
as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \
alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\
/* Query a type-specific function pointer, except one that uses
@@ -312,6 +312,62 @@ void PASTEMAC(opname,EX_SUF) \
GENFRONT( setid )
#undef GENFRONT
#define GENFRONT( opname ) \
\
void PASTEMAC(opname,EX_SUF) \
( \
obj_t* alpha, \
obj_t* x \
BLIS_OAPI_EX_PARAMS \
) \
{ \
bli_init_once(); \
\
BLIS_OAPI_EX_DECLS \
\
num_t dt = bli_obj_dt( x ); \
\
doff_t diagoffx = bli_obj_diag_offset( x ); \
dim_t m = bli_obj_length( x ); \
dim_t n = bli_obj_width( x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t rs_x = bli_obj_row_stride( x ); \
inc_t cs_x = bli_obj_col_stride( x ); \
\
void* buf_alpha; \
\
obj_t alpha_local; \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( alpha, x ); \
\
/* Create local copy-casts of scalars (and apply internal conjugation
as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\
/* Query a type-specific function pointer, except one that uses
void* instead of typed pointers. */ \
PASTECH2(opname,BLIS_TAPI_EX_SUF,_vft) f = \
PASTEMAC2(opname,BLIS_TAPI_EX_SUF,_qfp)( dt ); \
\
f \
( \
diagoffx, \
m, \
n, \
buf_alpha, \
buf_x, rs_x, cs_x, \
cntx, \
rntm \
); \
}
GENFRONT( shiftd )
#undef GENFRONT
#define GENFRONT( opname ) \
\

View File

@@ -92,6 +92,7 @@ void PASTEMAC(opname,EX_SUF) \
GENTPROT( scald )
GENTPROT( setd )
GENTPROT( setid )
GENTPROT( shiftd )
#undef GENTPROT

View File

@@ -387,6 +387,64 @@ void PASTEMAC2(ch,opname,EX_SUF) \
INSERT_GENTFUNCR_BASIC2( setid, setv, BLIS_SETV_KER )
#undef GENTFUNC
#define GENTFUNC( ctype, ch, opname, kername, kerid ) \
\
void PASTEMAC2(ch,opname,EX_SUF) \
( \
doff_t diagoffx, \
dim_t m, \
dim_t n, \
ctype* alpha, \
ctype* x, inc_t rs_x, inc_t cs_x \
BLIS_TAPI_EX_PARAMS \
) \
{ \
bli_init_once(); \
\
BLIS_TAPI_EX_DECLS \
\
const num_t dt = PASTEMAC(ch,type); \
\
ctype* x1; \
dim_t n_elem; \
dim_t offx; \
inc_t incx; \
\
if ( bli_zero_dim2( m, n ) ) return; \
\
if ( bli_is_outside_diag( diagoffx, BLIS_NO_TRANSPOSE, m, n ) ) return; \
\
/* Determine the distance to the diagonals, the number of diagonal
elements, and the diagonal increments. */ \
bli_set_dims_incs_1d \
( \
diagoffx, \
m, n, rs_x, cs_x, \
&offx, &n_elem, &incx \
); \
\
x1 = x + offx; \
\
/* Obtain a valid context from the gks if necessary. */ \
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
\
/* Query the context for the operation's kernel address. */ \
PASTECH2(ch,kername,_ker_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx ); \
\
/* Invoke the kernel with the appropriate parameters. */ \
f( \
BLIS_NO_CONJUGATE, \
n_elem, \
alpha, 0, \
x1, incx, \
cntx \
); \
}
INSERT_GENTFUNC_BASIC2( shiftd, addv, BLIS_ADDV_KER )
#undef GENTFUNC
#define GENTFUNC( ctype, ch, opname, kername, kerid ) \
\

View File

@@ -126,6 +126,22 @@ void PASTEMAC2(ch,opname,EX_SUF) \
INSERT_GENTPROTR_BASIC0( setid )
#undef GENTPROT
#define GENTPROT( ctype, ch, opname ) \
\
void PASTEMAC2(ch,opname,EX_SUF) \
( \
doff_t diagoffx, \
dim_t m, \
dim_t n, \
ctype* alpha, \
ctype* x, inc_t rs_x, inc_t cs_x \
BLIS_TAPI_EX_PARAMS \
);
INSERT_GENTPROT_BASIC0( shiftd )
#undef GENTPROT
#define GENTPROT( ctype, ch, opname ) \
\

View File

@@ -36,10 +36,7 @@
#ifndef BLIS_CONFIGURETIME_CPUID
#include "blis.h"
#else
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <pthread.h>
#include "bli_system.h"
#include "bli_type_defs.h"
#include "bli_arch.h"
#include "bli_cpuid.h"
@@ -67,7 +64,9 @@ static pthread_once_t once_id = PTHREAD_ONCE_INIT;
void bli_arch_set_id_once( void )
{
#ifndef BLIS_CONFIGURETIME_CPUID
pthread_once( &once_id, bli_arch_set_id );
#endif
}
// -----------------------------------------------------------------------------

View File

@@ -35,10 +35,7 @@
#if 0
// Used only during standalone testing of ARM support.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include "bli_system.h"
#include "bli_type_defs.h"
#include "bli_cpuid.h"
#undef __x86_64__
@@ -51,10 +48,7 @@
#ifndef BLIS_CONFIGURETIME_CPUID
#include "blis.h"
#else
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <pthread.h>
#include "bli_system.h"
#include "bli_type_defs.h"
#include "bli_cpuid.h"
#endif

View File

@@ -0,0 +1,198 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2018, Southern Methodist University
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
#include <errno.h>
#if defined(_MSC_VER) && !defined(BLIS_ENABLE_PTHREADS)
int pthread_mutex_init( pthread_mutex_t* mutex,
const pthread_mutexattr_t* attr )
{
if ( attr ) return EINVAL;
InitializeSRWLock( mutex );
return 0;
}
int pthread_mutex_destroy( pthread_mutex_t* mutex )
{
return 0;
}
int pthread_mutex_lock( pthread_mutex_t* mutex )
{
AcquireSRWLockExclusive( mutex );
return 0;
}
int pthread_mutex_trylock( pthread_mutex_t* mutex )
{
return TryAcquireSRWLockExclusive( mutex ) ? 0 : EBUSY;
}
int pthread_mutex_unlock( pthread_mutex_t* mutex )
{
ReleaseSRWLockExclusive( mutex );
return 0;
}
static BOOL bli_init_once_wrapper( pthread_once_t* once,
void* param,
void** context)
{
( void )once;
( void )context;
typedef void (*callback)( void );
((callback)param)();
return TRUE;
}
void pthread_once( pthread_once_t* once, void (*init)(void) )
{
InitOnceExecuteOnce( once, bli_init_once_wrapper, init, NULL );
}
int pthread_cond_init( pthread_cond_t* cond,
const pthread_condattr_t* attr )
{
if ( attr ) return EINVAL;
InitializeConditionVariable( cond );
return 0;
}
int pthread_cond_destroy( pthread_cond_t* cond )
{
( void )cond;
return 0;
}
int pthread_cond_wait( pthread_cond_t* cond,
pthread_mutex_t* mutex )
{
if ( !SleepConditionVariableSRW( cond, mutex, INFINITE, 0 ) ) return EAGAIN;
return 0;
}
int pthread_cond_broadcast( pthread_cond_t* cond )
{
WakeAllConditionVariable( cond );
return 0;
}
typedef struct
{
void* (*start_routine)( void* );
void* param;
void** retval;
} bli_thread_param;
static DWORD bli_thread_func( void* param_ )
{
bli_thread_param* param = param_;
*param->retval = param->start_routine( param->param );
return 0;
}
int pthread_create( pthread_t* thread,
const pthread_attr_t* attr,
void* (*start_routine)(void*),
void* arg )
{
if ( attr ) return EINVAL;
bli_thread_param param = { start_routine, arg, &thread->retval };
thread->handle = CreateThread( NULL, 0, bli_thread_func, &param, 0, NULL );
if ( !thread->handle ) return EAGAIN;
return 0;
}
int pthread_join( pthread_t thread,
void** retval )
{
if ( !WaitForSingleObject( thread.handle, INFINITE ) ) return EAGAIN;
if ( retval ) *retval = thread.retval;
return 0;
}
#endif
#if defined(__APPLE__) || defined(_MSC_VER)
int pthread_barrier_init( pthread_barrier_t* barrier,
const pthread_barrierattr_t* attr,
unsigned int count )
{
if ( attr ) return EINVAL;
if ( count == 0 ) return EINVAL;
int err;
if ( (err = pthread_mutex_init( &barrier->mutex, 0 )) != 0 ) return err;
if ( (err = pthread_cond_init( &barrier->cond, 0 )) != 0 )
{
pthread_mutex_destroy( &barrier->mutex );
return err;
}
barrier->tripCount = count;
barrier->count = 0;
return 0;
}
int pthread_barrier_destroy( pthread_barrier_t *barrier )
{
pthread_cond_destroy( &barrier->cond );
pthread_mutex_destroy( &barrier->mutex );
return 0;
}
int pthread_barrier_wait( pthread_barrier_t *barrier )
{
pthread_mutex_lock( &barrier->mutex );
++(barrier->count);
if ( barrier->count >= barrier->tripCount )
{
barrier->count = 0;
pthread_cond_broadcast( &barrier->cond );
pthread_mutex_unlock( &barrier->mutex );
return 1;
}
else
{
pthread_cond_wait( &barrier->cond, &(barrier->mutex) );
pthread_mutex_unlock( &barrier->mutex );
return 0;
}
}
#endif

View File

@@ -0,0 +1,112 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2018, Southern Methodist University
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_PTHREAD_WRAP_H
#define BLIS_PTHREAD_WRAP_H
#if defined(_MSC_VER)
typedef SRWLOCK pthread_mutex_t;
typedef void pthread_mutexattr_t;
#define PTHREAD_MUTEX_INITIALIZER SRWLOCK_INIT
int pthread_mutex_init( pthread_mutex_t* mutex, const pthread_mutexattr_t *attr );
int pthread_mutex_destroy( pthread_mutex_t* mutex );
int pthread_mutex_lock( pthread_mutex_t* mutex );
int pthread_mutex_trylock( pthread_mutex_t* mutex );
int pthread_mutex_unlock( pthread_mutex_t* mutex );
typedef INIT_ONCE pthread_once_t;
#define PTHREAD_ONCE_INIT INIT_ONCE_STATIC_INIT
void pthread_once( pthread_once_t* once, void (*init)( void ) );
typedef CONDITION_VARIABLE pthread_cond_t;
typedef void pthread_condattr_t;
#define PTHREAD_COND_INITIALIZER CONDITION_VARIABLE_INIT
int pthread_cond_init( pthread_cond_t* cond, const pthread_condattr_t* attr );
int pthread_cond_destroy( pthread_cond_t* cond );
int pthread_cond_wait( pthread_cond_t* cond, pthread_mutex_t* mutex );
int pthread_cond_broadcast( pthread_cond_t* cond );
typedef struct
{
HANDLE handle;
void* retval;
} pthread_t;
typedef void pthread_attr_t;
int pthread_create( pthread_t *thread, const pthread_attr_t *attr, void* (*start_routine)( void* ), void *arg );
int pthread_join( pthread_t thread, void **retval );
#else
#include <pthread.h>
#endif
#if defined(__APPLE__) || defined(_MSC_VER)
typedef void pthread_barrierattr_t;
typedef struct
{
pthread_mutex_t mutex;
pthread_cond_t cond;
int count;
int tripCount;
} pthread_barrier_t;
int pthread_barrier_init( pthread_barrier_t *barrier, const pthread_barrierattr_t *attr, unsigned int count );
int pthread_barrier_destroy( pthread_barrier_t *barrier );
int pthread_barrier_wait( pthread_barrier_t *barrier );
#endif // _POSIX_BARRIERS
#endif

View File

@@ -103,7 +103,8 @@
// POSIX threads are unconditionally required, regardless of whether
// multithreading is enabled via pthreads or OpenMP (or disabled).
#include <pthread.h>
// If pthreads is not available (Windows), then fake it.
#include "bli_pthread_wrap.h"
#endif

View File

@@ -915,7 +915,7 @@ typedef struct
// -- Memory broker object type --
#include <pthread.h>
#include "bli_pthread_wrap.h"
#include "bli_malloc.h"
typedef struct membrk_s

View File

@@ -68,6 +68,7 @@ extern "C" {
// -- Threading definitions --
#include "bli_thread.h"
#include "bli_pthread.h"
// -- Constant definitions --

146
frame/thread/bli_pthread.c Normal file
View File

@@ -0,0 +1,146 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2018, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
/*
This wrapper API to pthreads is provided so that BLIS applications (such
as the BLIS testsuite) can call pthreads functions without worrying about
whether the implementation is external, provided via system library, or
provided by BLIS. In theory, it shouldn't matter, right? pthreads is
pthreads as long as the correct API is provided. However, in the
situations where BLIS *does* decide that it needs to provide a pthreads
implementation, such as on Windows, providing and exporting the API below
means that BLIS does not need to export pthreads symbols. (To be clear:
exporting pthreads symbols would work, but it seems ill-advised for BLIS
to ever export functionality beyond that of which it advertises itself as
providing: namely, BLAS-like and BLAS APIs.)
*/
// -- pthread_create(), pthread_join() -----------------------------------------
int bli_pthread_create
(
bli_pthread_t* thread,
const bli_pthread_attr_t* attr,
void* (*start_routine)(void*),
void* arg
)
{
return pthread_create( thread, attr, start_routine, arg );
}
int bli_pthread_join
(
bli_pthread_t thread,
void** retval
)
{
return pthread_join( thread, retval );
}
// -- pthread_mutex_*() --------------------------------------------------------
int bli_pthread_mutex_init
(
bli_pthread_mutex_t* mutex,
const bli_pthread_mutexattr_t* attr
)
{
return pthread_mutex_init( mutex, attr );
}
int bli_pthread_mutex_destroy
(
bli_pthread_mutex_t* mutex
)
{
return pthread_mutex_destroy( mutex );
}
int bli_pthread_mutex_lock
(
bli_pthread_mutex_t* mutex
)
{
return pthread_mutex_lock( mutex );
}
int bli_pthread_mutex_unlock
(
bli_pthread_mutex_t* mutex
)
{
return pthread_mutex_lock( mutex );
}
// -- pthread_barrier_*() ------------------------------------------------------
int bli_pthread_barrier_init
(
bli_pthread_barrier_t* barrier,
const bli_pthread_barrierattr_t* attr,
unsigned int count
)
{
return pthread_barrier_init( barrier, attr, count );
}
int bli_pthread_barrier_destroy
(
bli_pthread_barrier_t* barrier
)
{
return pthread_barrier_destroy( barrier );
}
int bli_pthread_barrier_wait
(
bli_pthread_barrier_t* barrier
)
{
return pthread_barrier_wait( barrier );
}
// -- pthread_once() -----------------------------------------------------------
void bli_pthread_once
(
bli_pthread_once_t* once,
void (*init)(void)
)
{
pthread_once( once, init );
}

109
frame/thread/bli_pthread.h Normal file
View File

@@ -0,0 +1,109 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2018, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// -- pthread types ------------------------------------------------------------
typedef pthread_t bli_pthread_t;
typedef pthread_attr_t bli_pthread_attr_t;
typedef pthread_mutex_t bli_pthread_mutex_t;
typedef pthread_mutexattr_t bli_pthread_mutexattr_t;
typedef pthread_barrier_t bli_pthread_barrier_t;
typedef pthread_barrierattr_t bli_pthread_barrierattr_t;
typedef pthread_once_t bli_pthread_once_t;
// -- pthread_create(), pthread_join() -----------------------------------------
int bli_pthread_create
(
bli_pthread_t* thread,
const bli_pthread_attr_t* attr,
void* (*start_routine)(void*),
void* arg
);
int bli_pthread_join
(
bli_pthread_t thread,
void** retval
);
// -- pthread_mutex_*() --------------------------------------------------------
int bli_pthread_mutex_init
(
bli_pthread_mutex_t* mutex,
const bli_pthread_mutexattr_t* attr
);
int bli_pthread_mutex_destroy
(
bli_pthread_mutex_t* mutex
);
int bli_pthread_mutex_lock
(
bli_pthread_mutex_t* mutex
);
int bli_pthread_mutex_unlock
(
bli_pthread_mutex_t* mutex
);
// -- pthread_barrier_*() ------------------------------------------------------
int bli_pthread_barrier_init
(
bli_pthread_barrier_t* barrier,
const bli_pthread_barrierattr_t* attr,
unsigned int count
);
int bli_pthread_barrier_destroy
(
bli_pthread_barrier_t* barrier
);
int bli_pthread_barrier_wait
(
bli_pthread_barrier_t* barrier
);
// -- pthread_once_*() ---------------------------------------------------------
void bli_pthread_once
(
bli_pthread_once_t* once,
void (*init)(void)
);

View File

@@ -38,8 +38,6 @@
// Define thrcomm_t for situations when POSIX multithreading is enabled.
#ifdef BLIS_ENABLE_PTHREADS
#include <pthread.h>
#ifdef BLIS_USE_PTHREAD_BARRIER
struct thrcomm_s
{

View File

@@ -39,6 +39,27 @@
int main( int argc, char** argv )
{
obj_t kappa, d;
num_t dtd = BLIS_DCOMPLEX;
bli_obj_create( dtd, 1, 1, 0, 0, &kappa );
bli_setsc( (2.0/1.0), -0.5, &kappa );
bli_obj_create( dtd, 5, 3, 0, 0, &d );
bli_randm( &d );
bli_printm( "d", &d, "%7.3f", "" );
bli_shiftd( &kappa, &d );
bli_printm( "d after", &d, "%7.3f", "" );
bli_obj_free( &kappa );
bli_obj_free( &d );
return 0;
obj_t a, b, c;
obj_t c_save;
obj_t alpha, beta;

View File

@@ -41,7 +41,7 @@
int main( int argc, char** argv )
{
obj_t a, c, d;
obj_t a, c;
obj_t c_save;
obj_t alpha;
dim_t m, n;
@@ -163,11 +163,6 @@ int main( int argc, char** argv )
//bli_obj_create( dt, m, n, n, 1, &c );
bli_obj_create( dt, m, n, 0, 0, &c_save );
if ( bli_does_trans( side ) )
bli_obj_create( dt, m, m, 0, 0, &d );
else
bli_obj_create( dt, n, n, 0, 0, &d );
bli_randm( &a );
bli_randm( &c );
@@ -179,8 +174,8 @@ int main( int argc, char** argv )
bli_randm( &a );
bli_mktrim( &a );
bli_setd( &BLIS_TWO, &d );
bli_addd( &d, &a );
// Load the diagonal of A to make it more likely to be invertible.
bli_shiftd( &BLIS_TWO, &a );
bli_setsc( (2.0/1.0), 0.0, &alpha );
@@ -328,7 +323,6 @@ int main( int argc, char** argv )
bli_obj_free( &a );
bli_obj_free( &c );
bli_obj_free( &c_save );
bli_obj_free( &d );
}
//bli_finalize();

View File

@@ -144,13 +144,14 @@ int main( int argc, char** argv )
bli_obj_set_conjtrans( transa, &a );
bli_obj_set_diag( diaga, &a );
// Randomize A, make it densely Hermitian, and zero the unstored
// triangle to ensure the implementation reads only from the stored
// region.
// Randomize A and zero the unstored triangle to ensure the
// implementation reads only from the stored region.
bli_randm( &a );
bli_mkherm( &a );
bli_mktrim( &a );
// Load the diagonal of A to make it more likely to be invertible.
bli_shiftd( &BLIS_TWO, &a );
bli_setsc( (2.0/1.0), 1.0, &alpha );

View File

@@ -114,6 +114,14 @@ int main( int argc, char** argv )
bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a );
bli_obj_set_diag( BLIS_NONUNIT_DIAG, &a );
// Randomize A and zero the unstored triangle to ensure the
// implementation reads only from the stored region.
bli_randm( &a );
bli_mktrim( &a );
// Load the diagonal of A to make it more likely to be invertible.
bli_shiftd( &BLIS_TWO, &a );
bli_setsc( (1.0/1.0), 0.0, &alpha );

View File

@@ -1,70 +0,0 @@
//#ifdef __APPLE__
//#if !defined(_POSIX_BARRIERS) || (_POSIX_BARRIERS < 0)
#if !defined(_POSIX_BARRIERS) || (_POSIX_BARRIERS != 200809L)
#ifndef PTHREAD_BARRIER_H_
#define PTHREAD_BARRIER_H_
#include <pthread.h>
#include <errno.h>
typedef int pthread_barrierattr_t;
typedef struct
{
pthread_mutex_t mutex;
pthread_cond_t cond;
int count;
int tripCount;
} pthread_barrier_t;
inline int pthread_barrier_init(pthread_barrier_t *barrier, const pthread_barrierattr_t *attr, unsigned int count)
{
if(count == 0)
{
errno = EINVAL;
return -1;
}
if(pthread_mutex_init(&barrier->mutex, 0) < 0)
{
return -1;
}
if(pthread_cond_init(&barrier->cond, 0) < 0)
{
pthread_mutex_destroy(&barrier->mutex);
return -1;
}
barrier->tripCount = count;
barrier->count = 0;
return 0;
}
inline int pthread_barrier_destroy(pthread_barrier_t *barrier)
{
pthread_cond_destroy(&barrier->cond);
pthread_mutex_destroy(&barrier->mutex);
return 0;
}
inline int pthread_barrier_wait(pthread_barrier_t *barrier)
{
pthread_mutex_lock(&barrier->mutex);
++(barrier->count);
if(barrier->count >= barrier->tripCount)
{
barrier->count = 0;
pthread_cond_broadcast(&barrier->cond);
pthread_mutex_unlock(&barrier->mutex);
return 1;
}
else
{
pthread_cond_wait(&barrier->cond, &(barrier->mutex));
pthread_mutex_unlock(&barrier->mutex);
return 0;
}
}
#endif // PTHREAD_BARRIER_H_
#endif // _POSIX_BARRIERS

View File

@@ -126,20 +126,20 @@ void libblis_test_thread_decorator( test_params_t* params, test_ops_t* ops )
// Allocate an array of pthread objects and auxiliary data structs to pass
// to the thread entry functions.
pthread_t* pthread = bli_malloc_intl( sizeof( pthread_t ) * nt );
thread_data_t* tdata = bli_malloc_intl( sizeof( thread_data_t ) * nt );
bli_pthread_t* pthread = bli_malloc_intl( sizeof( bli_pthread_t ) * nt );
thread_data_t* tdata = bli_malloc_intl( sizeof( thread_data_t ) * nt );
// Allocate a mutex for the threads to share.
//pthread_mutex_t* mutex = bli_malloc_intl( sizeof( pthread_mutex_t ) );
//bli_pthread_mutex_t* mutex = bli_malloc_intl( sizeof( bli_pthread_mutex_t ) );
// Allocate a barrier for the threads to share.
pthread_barrier_t* barrier = bli_malloc_intl( sizeof( pthread_barrier_t ) );
bli_pthread_barrier_t* barrier = bli_malloc_intl( sizeof( bli_pthread_barrier_t ) );
// Initialize the mutex.
//pthread_mutex_init( mutex, NULL );
//bli_pthread_mutex_init( mutex, NULL );
// Initialize the barrier for nt threads.
pthread_barrier_init( barrier, NULL, nt );
bli_pthread_barrier_init( barrier, NULL, nt );
// NOTE: We must iterate backwards so that the chief thread (thread id 0)
// can spawn all other threads before proceeding with its own computation.
@@ -157,7 +157,7 @@ void libblis_test_thread_decorator( test_params_t* params, test_ops_t* ops )
// Spawn additional threads for ids greater than 1.
if ( id != 0 )
pthread_create( &pthread[id], NULL, libblis_test_thread_entry, &tdata[id] );
bli_pthread_create( &pthread[id], NULL, libblis_test_thread_entry, &tdata[id] );
else
libblis_test_thread_entry( ( void* )(&tdata[0]) );
}
@@ -165,14 +165,14 @@ void libblis_test_thread_decorator( test_params_t* params, test_ops_t* ops )
// Thread 0 waits for additional threads to finish.
for ( unsigned int id = 1; id < nt; id++ )
{
pthread_join( pthread[id], NULL );
bli_pthread_join( pthread[id], NULL );
}
// Destroy the mutex.
//pthread_mutex_destroy( mutex );
//bli_pthread_mutex_destroy( mutex );
// Destroy the barrier.
pthread_barrier_destroy( barrier );
bli_pthread_barrier_destroy( barrier );
// Free the pthread-related memory.
bli_free_intl( pthread );
@@ -2267,9 +2267,9 @@ void libblis_test_op_driver
}
}
}
// Wait for all other threads so that the output stays organized.
pthread_barrier_wait( tdata->barrier );
bli_pthread_barrier_wait( tdata->barrier );
// These statements should only be executed by one thread.
if ( tdata->id == 0 )
@@ -2694,21 +2694,10 @@ void libblis_test_ceil_pow2( obj_t* alpha )
void libblis_test_mobj_load_diag( test_params_t* params, obj_t* a )
{
num_t dt = bli_obj_dt( a );
dim_t m = bli_obj_length( a );
dim_t n = bli_obj_width( a );
obj_t d;
// We assume that all elements of a were intialized on interval [-1,1].
bli_obj_create( dt, m, n, 0, 0, &d );
// Initialize the diagonal of d to 2.0 and then add the diagonal of a.
bli_setd( &BLIS_TWO, &d );
bli_addd( &d, a );
bli_obj_free( &d );
// Load the diagonal by 2.0.
bli_shiftd( &BLIS_TWO, a );
}

View File

@@ -52,14 +52,6 @@
#include <unistd.h>
#endif
// For pthreads API.
#include <pthread.h>
//#ifdef __APPLE__
//#if !defined(_POSIX_BARRIERS) || (_POSIX_BARRIERS < 0)
#if !defined(_POSIX_BARRIERS) || (_POSIX_BARRIERS != 200809L)
#include "pthread_barrier.h"
#endif
//
// --- Constants and types -----------------------------------------------------
//
@@ -303,13 +295,13 @@ typedef struct
typedef struct thread_data
{
test_params_t* params;
test_ops_t* ops;
unsigned int nt;
unsigned int id;
unsigned int xc;
//pthread_mutex_t* mutex;
pthread_barrier_t* barrier;
test_params_t* params;
test_ops_t* ops;
unsigned int nt;
unsigned int id;
unsigned int xc;
//bli_pthread_mutex_t* mutex;
bli_pthread_barrier_t* barrier;
} thread_data_t;