mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
Make BLIS_ARCH_TYPE=0 be an error, so that incorrect meaningful names will get an error rather than "skx" code path. BLIS_ARCH_TYPE=1 is now "generic", so that it should be constant as new code paths are added. Thus all other code path enum values have increased by 2. Also added new options to BLIS configure program to allow: 1. BLIS_ARCH_TYPE functionality to be disabled, e.g.: ./configure --disable-blis-arch-type amdzen 2. Renaming the environment variable tested from "BLIS_ARCH_TYPE" to a specified value, e.g.: ./configure --rename-blis-arch-type=MY_NAME_FOR_ARCH_TYPE amdzen On Windows, these can be enabled with e.g.: cmake ... -DDISABLE_BLIS_ARCH_TYPE=ON or cmake ... -DRENAME_BLIS_ARCH_TYPE=MY_NAME_FOR_ARCH_TYPE This implements changes 2 and 3 in the Jira ticket below. AMD-Internal: [CPUPL-2235] Change-Id: Ie42906bd909f9d83f00a90c5bef9c5bf3ef5adb4
3871 lines
128 KiB
Bash
Executable File
3871 lines
128 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
#
|
|
# BLIS
|
|
# An object-based framework for developing high-performance BLAS-like
|
|
# libraries.
|
|
#
|
|
# Copyright (C) 2014, The University of Texas at Austin
|
|
# Copyright (C) 2020-2022, Advanced Micro Devices, Inc. All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are
|
|
# met:
|
|
# - Redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer.
|
|
# - Redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution.
|
|
# - Neither the name(s) of the copyright holder(s) nor the names of its
|
|
# contributors may be used to endorse or promote products derived
|
|
# from this software without specific prior written permission.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
#
|
|
#
|
|
|
|
#
|
|
# -- Helper functions ----------------------------------------------------------
|
|
#
|
|
|
|
print_usage()
|
|
{
|
|
# Use the version string in the 'version' file since we don't have
|
|
# the patched version string yet.
|
|
if [ -z "${version}" ]; then
|
|
version=$(cat "${version_filepath}")
|
|
fi
|
|
|
|
# Echo usage info.
|
|
echo " "
|
|
echo " ${script_name} (BLIS ${version})"
|
|
#echo " "
|
|
#echo " BLIS ${version}"
|
|
echo " "
|
|
echo " Configure BLIS's build system for compilation using a specified"
|
|
echo " configuration directory."
|
|
echo " "
|
|
echo " Usage:"
|
|
echo " "
|
|
echo " ${script_name} [options] [env. vars.] confname"
|
|
echo " "
|
|
echo " Arguments:"
|
|
echo " "
|
|
echo " confname The name of the sub-directory inside of the 'config'"
|
|
echo " directory containing the desired BLIS configuration."
|
|
echo " Note that confname MUST be specified; if it is not,"
|
|
echo " configure will complain. To build a completely generic"
|
|
echo " implementation, use the 'generic' configuration"
|
|
echo " "
|
|
echo " Options:"
|
|
echo " "
|
|
echo " -p PREFIX, --prefix=PREFIX"
|
|
echo " "
|
|
echo " The common installation prefix for all files. If given,"
|
|
echo " this option effectively implies:"
|
|
echo " --libdir=EXECPREFIX/lib"
|
|
echo " --includedir=PREFIX/include"
|
|
echo " --sharedir=PREFIX/share"
|
|
echo " where EXECPREFIX defaults to PREFIX. If this option is"
|
|
echo " not given, PREFIX defaults to '${prefix_def}'. If PREFIX"
|
|
echo " refers to a directory that does not exist, it will be"
|
|
echo " created."
|
|
echo " "
|
|
echo " --exec-prefix=EXECPREFIX"
|
|
echo " "
|
|
echo " The installation prefix for libraries. Specifically, if"
|
|
echo " given, this option effectively implies:"
|
|
echo " --libdir=EXECPREFIX/lib"
|
|
echo " If not given, EXECPREFIX defaults to PREFIX, which may be"
|
|
echo " modified by the --prefix option. If EXECPREFIX refers to"
|
|
echo " a directory that does not exist, it will be created."
|
|
echo " "
|
|
echo " --libdir=LIBDIR"
|
|
echo " "
|
|
echo " The path to which make will install libraries. If not"
|
|
echo " given, LIBDIR defaults to PREFIX/lib. If LIBDIR refers to"
|
|
echo " a directory that does not exist, it will be created."
|
|
echo " "
|
|
echo " --includedir=INCDIR"
|
|
echo " "
|
|
echo " The path to which make will install development header"
|
|
echo " files. If not given, INCDIR defaults to PREFIX/include."
|
|
echo " If INCDIR refers to a directory that does not exist, it"
|
|
echo " will be created."
|
|
echo " "
|
|
echo " --sharedir=SHAREDIR"
|
|
echo " "
|
|
echo " The path to which make will makefile fragments containing"
|
|
echo " make variables determined by configure (e.g. CC, CFLAGS,"
|
|
echo " and LDFLAGS). These files allow certain BLIS makefiles,"
|
|
echo " such as those in the examples or testsuite directories, to"
|
|
echo " operate on an installed copy of BLIS rather than a local"
|
|
echo " (and possibly uninstalled) copy. If not given, SHAREDIR"
|
|
echo " defaults to PREFIX/share. If SHAREDIR refers to a"
|
|
echo " directory that does not exist, it will be created."
|
|
echo " "
|
|
echo " --enable-verbose-make, --disable-verbose-make"
|
|
echo " "
|
|
echo " Enable (disabled by default) verbose compilation output"
|
|
echo " during make."
|
|
echo " "
|
|
echo " --enable-arg-max-hack --disable-arg-max-hack"
|
|
echo " "
|
|
echo " Enable (disabled by default) build system logic that"
|
|
echo " will allow archiving/linking the static/shared library"
|
|
echo " even if the command plus command line arguments exceeds"
|
|
echo " the operating system limit (ARG_MAX)."
|
|
echo " "
|
|
echo " -d DEBUG, --enable-debug[=DEBUG]"
|
|
echo " "
|
|
echo " Enable debugging symbols in the library. If argument"
|
|
echo " DEBUG is given as 'opt', then optimization flags are"
|
|
echo " kept in the framework, otherwise optimization is"
|
|
echo " turned off."
|
|
echo " "
|
|
echo " --disable-static, --enable-static"
|
|
echo " "
|
|
echo " Disable (enabled by default) building BLIS as a static"
|
|
echo " library. If the static library build is disabled, the"
|
|
echo " shared library build must remain enabled."
|
|
echo " "
|
|
echo " --disable-shared, --enable-shared"
|
|
echo " "
|
|
echo " Disable (enabled by default) building BLIS as a shared"
|
|
echo " library. If the shared library build is disabled, the"
|
|
echo " static library build must remain enabled."
|
|
echo " "
|
|
echo " -e SYMBOLS, --export-shared[=SYMBOLS]"
|
|
echo " "
|
|
echo " Specify the subset of library symbols that are exported"
|
|
echo " within a shared library. Valid values for SYMBOLS are:"
|
|
echo " 'public' (the default) and 'all'. By default, only"
|
|
echo " functions and variables that belong to public APIs are"
|
|
echo " exported in shared libraries. However, the user may"
|
|
echo " instead export all symbols in BLIS, even those that were"
|
|
echo " intended for internal use only. Note that the public APIs"
|
|
echo " encompass all functions that almost any user would ever"
|
|
echo " want to call, including the BLAS/CBLAS compatibility APIs"
|
|
echo " as well as the basic and expert interfaces to the typed"
|
|
echo " and object APIs that are unique to BLIS. Also note that"
|
|
echo " changing this option to 'all' will have no effect in some"
|
|
echo " environments, such as when compiling with clang on"
|
|
echo " Windows."
|
|
echo " "
|
|
echo " -t MODEL, --enable-threading[=MODEL], --disable-threading"
|
|
echo " "
|
|
echo " Enable threading in the library, using threading model"
|
|
echo " MODEL={openmp,pthreads,no}. If MODEL=no or "
|
|
echo " --disable-threading is specified, threading will be"
|
|
echo " disabled. The default is 'no'."
|
|
echo " "
|
|
echo " --enable-system, --disable-system"
|
|
echo " "
|
|
echo " Enable conventional operating system support, such as"
|
|
echo " pthreads for thread-safety. The default state is enabled."
|
|
echo " However, in rare circumstances you may wish to configure"
|
|
echo " BLIS for use with a minimal or nonexistent operating"
|
|
echo " system (e.g. hardware simulators). In these situations,"
|
|
echo " --disable-system may be used to jettison all compile-time"
|
|
echo " and link-time dependencies outside of the standard C"
|
|
echo " library. When disabled, this option also forces the use"
|
|
echo " of --disable-threading."
|
|
echo " "
|
|
echo " --disable-pba-pools, --enable-pba-pools"
|
|
echo " --disable-sba-pools, --enable-sba-pools"
|
|
echo " "
|
|
echo " Disable (enabled by default) use of internal memory pools"
|
|
echo " within the packing block allocator (pba) and/or the small"
|
|
echo " block allocator (sba). The former is used to allocate"
|
|
echo " memory used to pack submatrices while the latter is used"
|
|
echo " to allocate control/thread tree nodes and thread"
|
|
echo " communicators. Both allocations take place in the context"
|
|
echo " of level-3 operations. When the pba is disabled, the"
|
|
echo " malloc()-like function specified by BLIS_MALLOC_POOL is"
|
|
echo " called on-demand whenever a packing block is needed, and"
|
|
echo " when the sba is disabled, the malloc()-like function"
|
|
echo " specified by BLIS_MALLOC_INTL is called whenever a small"
|
|
echo " block is needed, with the two allocators calling free()-"
|
|
echo " like functions BLIS_FREE_POOL and BLIS_FREE_INTL,"
|
|
echo " respectively when blocks are released. When enabled,"
|
|
echo " either or both pools are populated via the same functions"
|
|
echo " mentioned previously, and henceforth blocks are checked"
|
|
echo " out and in. The library quickly reaches a state in which"
|
|
echo " it no longer needs to call malloc() or free(), even"
|
|
echo " across many separate level-3 operation invocations."
|
|
echo " "
|
|
echo " --enable-mem-tracing, --disable-mem-tracing"
|
|
echo " "
|
|
echo " Enable (disable by default) output to stdout that traces"
|
|
echo " the allocation and freeing of memory, including the names"
|
|
echo " of the functions that triggered the allocation/freeing."
|
|
echo " Enabling this option WILL NEGATIVELY IMPACT PERFORMANCE."
|
|
echo " Please use only for informational/debugging purposes."
|
|
echo " "
|
|
echo " -i SIZE, --int-size=SIZE"
|
|
echo " "
|
|
echo " Set the size (in bits) of internal BLIS integers and"
|
|
echo " integer types used in native BLIS interfaces. The"
|
|
echo " default inteter type size is architecture dependent."
|
|
echo " (Hint: You can always find this value printed at the"
|
|
echo " beginning of the testsuite output.)"
|
|
echo " "
|
|
echo " -b SIZE, --blas-int-size=SIZE"
|
|
echo " "
|
|
echo " Set the size (in bits) of integer types in external"
|
|
echo " BLAS and CBLAS interfaces, if enabled. The default"
|
|
echo " integer type size used in BLAS/CBLAS is 32 bits."
|
|
echo " "
|
|
echo " --disable-blas, --enable-blas"
|
|
echo " "
|
|
echo " Disable (enabled by default) building the BLAS"
|
|
echo " compatibility layer."
|
|
echo " "
|
|
echo " --enable-cblas, --disable-cblas"
|
|
echo " "
|
|
echo " Enable (disabled by default) building the CBLAS"
|
|
echo " compatibility layer. This automatically enables the"
|
|
echo " BLAS compatibility layer as well."
|
|
echo " "
|
|
echo " --disable-mixed-dt, --enable-mixed-dt"
|
|
echo " "
|
|
echo " Disable (enabled by default) support for mixing the"
|
|
echo " storage domain and/or storage precision of matrix"
|
|
echo " operands for the gemm operation, as well as support"
|
|
echo " for computing in a precision different from one or"
|
|
echo " both of matrices A and B."
|
|
echo " "
|
|
echo " --disable-mixed-dt-extra-mem, --enable-mixed-dt-extra-mem"
|
|
echo " "
|
|
echo " Disable (enabled by default) support for additional"
|
|
echo " mixed datatype optimizations that require temporarily"
|
|
echo " allocating extra memory--specifically, a single m x n"
|
|
echo " matrix (per application thread) whose storage datatype"
|
|
echo " is equal to the computation datatype. This option may"
|
|
echo " only be enabled when mixed domain/precision support is"
|
|
echo " enabled."
|
|
echo " "
|
|
echo " --disable-sup-handling, --enable-sup-handling"
|
|
echo " "
|
|
echo " Disable (enabled by default) handling of small/skinny"
|
|
echo " matrix problems via separate code branches. When disabled,"
|
|
echo " these small/skinny level-3 operations will be performed by"
|
|
echo " the conventional implementation, which is optimized for"
|
|
echo " medium and large problems. Note that what qualifies as"
|
|
echo " \"small\" depends on thresholds that may vary by sub-"
|
|
echo " configuration."
|
|
echo " "
|
|
echo " -a NAME --enable-addon=NAME"
|
|
echo " "
|
|
echo " Enable the code provided by an addon. An addon consists"
|
|
echo " of a separate directory of code that provides additional"
|
|
echo " APIs, implementations, and/or operations that would"
|
|
echo " otherwise not be present within a build of BLIS. This"
|
|
echo " option may be used multiple times to specify the inclusion"
|
|
echo " of multiple addons. By default, no addons are enabled."
|
|
echo " "
|
|
echo " -s NAME --enable-sandbox=NAME"
|
|
echo " "
|
|
echo " Enable a separate sandbox implementation of gemm. This"
|
|
echo " option disables BLIS's conventional gemm implementation"
|
|
echo " (which shares common infrastructure with other level-3"
|
|
echo " operations) and instead compiles and uses the code in"
|
|
echo " the NAME directory, which is expected to be a sub-"
|
|
echo " directory of 'sandbox'. By default, no sandboxes are"
|
|
echo " enabled."
|
|
echo " "
|
|
echo " --with-memkind, --without-memkind"
|
|
echo " "
|
|
echo " Forcibly enable or disable the use of libmemkind's"
|
|
echo " hbw_malloc() and hbw_free() as substitutes for malloc()"
|
|
echo " and free(), respectively, when allocating memory for"
|
|
echo " BLIS's memory pools, which are used to manage buffers"
|
|
echo " into which matrices are packed. The default behavior"
|
|
echo " for this option is environment-dependent; if configure"
|
|
echo " detects the presence of libmemkind, libmemkind is used"
|
|
echo " by default, and otherwise it is not used by default."
|
|
echo " "
|
|
echo " -r METHOD, --thread-part-jrir=METHOD"
|
|
echo " "
|
|
echo " Request a method of assigning micropanels to threads in"
|
|
echo " the JR and IR loops. Valid values for METHOD are 'slab'"
|
|
echo " and 'rr'. Using 'slab' assigns (as much as possible)"
|
|
echo " contiguous regions of micropanels to each thread while"
|
|
echo " using 'rr' assigns micropanels to threads in a round-"
|
|
echo " robin fashion. The chosen method also applies during"
|
|
echo " the packing of A and B. The default method is 'slab'."
|
|
echo " NOTE: Specifying this option constitutes a request,"
|
|
echo " which may be ignored in select situations if the"
|
|
echo " implementation has a good reason to do so."
|
|
echo " "
|
|
echo " --disable-trsm-preinversion, --enable-trsm-preinversion"
|
|
echo " "
|
|
echo " Disable (enabled by default) pre-inversion of triangular"
|
|
echo " matrix diagonals when performing trsm. When pre-inversion"
|
|
echo " is enabled, diagonal elements are inverted outside of the"
|
|
echo " microkernel (e.g. during packing) so that the microkernel"
|
|
echo " can use multiply instructions. When disabled, division"
|
|
echo " instructions are used within the microkernel. Executing"
|
|
echo " these division instructions within the microkernel will"
|
|
echo " incur a performance penalty, but numerical robustness will"
|
|
echo " improve for certain cases involving denormal numbers that"
|
|
echo " would otherwise result in overflow in the pre-inverted"
|
|
echo " values."
|
|
echo " "
|
|
echo " --force-version=STRING"
|
|
echo " "
|
|
echo " Force configure to use an arbitrary version string"
|
|
echo " STRING. This option may be useful when repackaging"
|
|
echo " custom versions of BLIS by outside organizations."
|
|
echo " "
|
|
echo " -c, --show-config-lists"
|
|
echo " "
|
|
echo " Print the config and kernel lists, and kernel-to-config"
|
|
echo " map after they are read from file. This can be useful"
|
|
echo " when debugging certain configuration issues, and/or as"
|
|
echo " a sanity check to make sure these lists are constituted"
|
|
echo " as expected."
|
|
echo " "
|
|
echo " --complex-return=gnu|intel"
|
|
echo " "
|
|
echo " Specify the way in which complex numbers are returned"
|
|
echo " from Fortran functions, either \"gnu\" (return in"
|
|
echo " registers) or \"intel\" (return via hidden argument)."
|
|
echo " If not specified and the environment variable FC is set,"
|
|
echo " attempt to determine the return type from the compiler."
|
|
echo " Otherwise, the default is \"gnu\"."
|
|
echo " "
|
|
echo " --enable-aocl-dynamic, --disable-aocl-dynamic"
|
|
echo " "
|
|
echo " Disable (Enabled by default) dynamic selection of number of"
|
|
echo " threads used to solve the given problem."
|
|
echo " Range of optimum number of threads will be [1, num_threads],"
|
|
echo " where \"num_threads\" is number of threads set by the application."
|
|
echo " Num_threads is derived from either environment variable"
|
|
echo " OMP_NUM_THREADS or BLIS_NUM_THREADS' or bli_set_num_threads() API."
|
|
echo " "
|
|
echo " --enable-blis-arch-type, --disable-blis-arch-type"
|
|
echo " "
|
|
echo " Disable (Enabled by default) support for BLIS_ARCH_TYPE"
|
|
echo " environment variable, which allows user to select"
|
|
echo " architecture-specific code path at runtime."
|
|
echo " If disabled, in builds with multiple code paths, BLIS"
|
|
echo " will still select path automatically."
|
|
echo " "
|
|
echo " --rename-blis-arch-type=STRING"
|
|
echo " "
|
|
echo " Change environment variable used to select architecture-specific"
|
|
echo " code path from BLIS_ARCH_TYPE to STRING"
|
|
echo " "
|
|
echo " -q, --quiet Suppress informational output. By default, configure"
|
|
echo " is verbose. (NOTE: -q is not yet implemented)"
|
|
echo " "
|
|
echo " -h, --help Output this information and quit."
|
|
echo " "
|
|
echo " Environment Variables:"
|
|
echo " "
|
|
echo " CC Specifies the C compiler to use."
|
|
echo " CXX Specifies the C++ compiler to use (sandbox only)."
|
|
echo " FC Specifies the Fortran compiler to use (only to determine --complex-return)."
|
|
echo " RANLIB Specifies the ranlib executable to use."
|
|
echo " AR Specifies the archiver to use."
|
|
echo " CFLAGS Specifies additional compiler flags to use (prepended)."
|
|
echo " LDFLAGS Specifies additional linker flags to use (prepended)."
|
|
echo " LIBPTHREAD Pthreads library to use."
|
|
echo " PYTHON Specifies the python interpreter to use."
|
|
echo " "
|
|
echo " Environment variables may also be specified as command line"
|
|
echo " options, e.g.:"
|
|
echo " "
|
|
echo " ./configure [options] CC=gcc haswell"
|
|
echo " "
|
|
echo " Note that not all compilers are compatible with a given"
|
|
echo " configuration."
|
|
echo " "
|
|
|
|
# Exit with non-zero exit status
|
|
exit 1
|
|
}
|
|
|
|
query_array()
|
|
{
|
|
local arr key var_name
|
|
|
|
arr="$1"
|
|
key="$2"
|
|
|
|
var_name="${arr}_${key}"
|
|
|
|
echo "${!var_name}"
|
|
}
|
|
|
|
assign_key_value()
|
|
{
|
|
local arr key val
|
|
|
|
arr="$1"
|
|
key="$2"
|
|
val="$3"
|
|
|
|
printf -v "${arr}_${key}" %s "${val}"
|
|
}
|
|
|
|
#
|
|
# FGVZ: This commented-out function is being kept as an example how how
|
|
# to effectively "pass by reference" in bash. That is, pass the name of
|
|
# a variable, instead of its conents, and then let the function use the
|
|
# variable by prepending a $, at which time it can evaluate the string
|
|
# as if it were a literal variable occurance.
|
|
#
|
|
#filteradd_to_list()
|
|
#{
|
|
# local dlist ditem list_c item_c is_blacklisted
|
|
#
|
|
# # Add $1 to the list identified by $2, but only if $1 is not
|
|
# # found in a blacklist.
|
|
#
|
|
# # Note: $2 can actually be a list of items.
|
|
# dlist=\$"$1"
|
|
# ditem=\$"$2"
|
|
#
|
|
# # Acquire the contents of $list and $item and store them in list_c
|
|
# # and item_c, respectively.
|
|
# list_c=$(eval "expr \"$dlist\" ")
|
|
# item_c=$(eval "expr \"$ditem\" ")
|
|
#
|
|
# # Iterate over $item_c in case it is actually multiple items.
|
|
# for cur_item in $item_c; do
|
|
#
|
|
# is_blacklisted=$(is_in_list "${cur_item}" "${config_blist}")
|
|
# if [ ${is_blacklisted} == "false" ]; then
|
|
#
|
|
# # If cur_item is not blacklisted, add it to list_c.
|
|
# list_c="${list_c} ${cur_item}"
|
|
# fi
|
|
# done
|
|
#
|
|
# # Update the argument.
|
|
# eval "$1=\"${list_c}\""
|
|
#}
|
|
|
|
pass_config_kernel_registries()
|
|
{
|
|
local filename passnum
|
|
local all_blist
|
|
local curline list item config kernels
|
|
local cname clist klist
|
|
|
|
# Read function arguments:
|
|
# first argument: the file containing the configuration registry.
|
|
# second argument: the pass number: 0 or 1. Pass 0 builds the
|
|
# indirect config blacklist (indirect_blist) ONLY. Pass 1 actually
|
|
# begins populating the config and kernel registries, and assumes
|
|
# the indirect_blist has already been created.
|
|
filename="$1"
|
|
passnum="$2"
|
|
|
|
# Initialize a list of indirect blacklisted configurations for the
|
|
# current iteration. These are configurations that are invalidated by
|
|
# the removal of blacklisted configurations. For example, if haswell
|
|
# is registered as needing the 'haswell' and 'zen' kernel sets:
|
|
#
|
|
# haswell: haswell/haswell/zen
|
|
#
|
|
# and 'zen' was blacklisted because of the compiler version, then the
|
|
# 'haswell' configuration must be omitted from the registry, as it no
|
|
# longer has all of the kernel sets it was expecting.
|
|
if [ "${passnum}" == "0" ]; then
|
|
indirect_blist=""
|
|
fi
|
|
|
|
# For convenience, merge the original and indirect blacklists.
|
|
# NOTE: During pass 0, all_blist is equal to config_blist, since
|
|
# indirect_blist is still empty.
|
|
all_blist="${config_blist} ${indirect_blist}"
|
|
|
|
# Disable support for indirect blacklisting by returning early during
|
|
# pass 0. See issue #214 for details [1]. Basically, I realized that
|
|
# indirect blacklisting is not needed in the use case that I envisioned
|
|
# in the real-life example above. If a subconfiguration such as haswell
|
|
# is defined to require the zen kernel set, it implies that the zen
|
|
# kernels can be compiled with haswell compiler flags. That is, just
|
|
# because the zen subconfig (and its compiler flags) is blacklisted
|
|
# does not mean that the haswell subconfig cannot compile the zen
|
|
# kernels with haswell-specific flags.
|
|
#
|
|
# [1] https://github.com/flame/blis/issues/214
|
|
#
|
|
if [ "${passnum}" == "0" ]; then
|
|
return
|
|
fi
|
|
|
|
while read -r line
|
|
do
|
|
curline="${line}"
|
|
|
|
# Remove everything after comment character '#'.
|
|
curline=${curline%%#*}
|
|
|
|
# We've stripped out leading whitespace and trailing comments. If
|
|
# the line is now empty, then we can skip it altogether.
|
|
if [ "x${curline}" = "x" ]; then
|
|
continue;
|
|
fi
|
|
|
|
# Read the config name and config list for the current line.
|
|
cname=${curline%%:*}
|
|
list=${curline##*:}
|
|
|
|
# If we encounter a slash, it means the name of the configuration
|
|
# and the kernel set needed by that configuration are different.
|
|
if [[ "${list}" == *[/]* ]]; then
|
|
|
|
#echo "Slash found."
|
|
klist=""
|
|
clist=""
|
|
for item in "${list}"; do
|
|
|
|
# The sub-configuration name is always the first sub-word in
|
|
# the slash-separated compound word.
|
|
config=${item%%/*}
|
|
|
|
# Delete the sub-configuration name from the front of the
|
|
# string, leaving the slash-separated kernel names (or just
|
|
# the kernel name, if there is only one).
|
|
kernels=${list#*/}
|
|
|
|
# Replace the slashes with spaces to transform the string
|
|
# into a space-separated list of kernel names.
|
|
kernels=$(echo -e ${kernels} | sed -e "s/\// /g")
|
|
|
|
clist="${clist} ${config}"
|
|
klist="${klist} ${kernels}"
|
|
done
|
|
else
|
|
|
|
#echo "Slash not found."
|
|
clist=${list}
|
|
klist=${list}
|
|
fi
|
|
|
|
# Strip out whitespace from the config name and config/kernel list
|
|
# on each line.
|
|
cname=$(canonicalize_ws "${cname}")
|
|
clist=$(canonicalize_ws "${clist}")
|
|
klist=$(canonicalize_ws "${klist}")
|
|
|
|
# Next, we prepare to:
|
|
# - pass 0: inspect klist for blacklisted configurations, which may
|
|
# reveal configurations as needing to be indirectly blacklisted.
|
|
# - pass 1: compare cname to the blacklists and commit clist/klist
|
|
# to their respective registries, as appropriate.
|
|
|
|
# Handle singleton and umbrella configuration entries separately.
|
|
if [ $(is_singleton_family "${cname}" "${clist}") == "true" ]; then
|
|
|
|
# Singleton configurations/families.
|
|
# Note: for singleton families, clist contains one item, which
|
|
# always equals cname, but klist could contain more than one
|
|
# item.
|
|
|
|
# Only consider updating the indirect blacklist (pass 0) or
|
|
# committing clist and klist to the registries (pass 1) if the
|
|
# configuration name (cname) is not blacklisted.
|
|
if [ $(is_in_list "${cname}" "${all_blist}") == "false" ]; then
|
|
|
|
if [ "${passnum}" == "0" ]; then
|
|
# Even if the cname isn't blacklisted, one of the requisite
|
|
# kernels might be, so we need to check klist for blacklisted
|
|
# items. If we find one, we must assume that the entire entry
|
|
# must be thrown out. (Ideally, we would simply fall back to
|
|
# reference code for the blacklisted kernels, but that is not
|
|
# at all straightforward under the current configuration
|
|
# system architecture.) Thus, we add cname to the indirect
|
|
# blacklist.
|
|
for item in ${klist}; do
|
|
if [ $(is_in_list "${item}" "${config_blist}") == "true" ]; then
|
|
indirect_blist="${indirect_blist} ${cname}"
|
|
break
|
|
fi
|
|
done
|
|
fi
|
|
|
|
if [ "${passnum}" == "1" ]; then
|
|
# Store the clist to the cname key of the config registry.
|
|
#config_registry[${cname}]=${clist}
|
|
#printf -v "config_registry_${cname}" %s "${clist}"
|
|
assign_key_value "config_registry" "${cname}" "${clist}"
|
|
fi
|
|
fi
|
|
|
|
if [ "${passnum}" == "1" ]; then
|
|
# Store the klist to the cname key of the kernel registry.
|
|
#kernel_registry[${cname}]=${klist}
|
|
#printf -v "kernel_registry_${cname}" %s "${klist}"
|
|
assign_key_value "kernel_registry" "${cname}" "${klist}"
|
|
fi
|
|
|
|
else
|
|
# Umbrella configurations/families.
|
|
|
|
# First we check cname, which should generally not be blacklisted
|
|
# for umbrella families, but we check anyway just to be safe.
|
|
if [ $(is_in_list "${cname}" "${all_blist}") == "false" ]; then
|
|
|
|
if [ "${passnum}" == "1" ]; then
|
|
|
|
# Check each item in the clist and klist. (At this point,
|
|
# clist == klist.) If any sub-config is blacklisted, we
|
|
# omit it from clist and klist.
|
|
for item in ${clist}; do
|
|
|
|
if [ $(is_in_list "${item}" "${all_blist}") == "true" ]; then
|
|
clist=$(remove_from_list "${item}" "${clist}")
|
|
klist=$(remove_from_list "${item}" "${klist}")
|
|
fi
|
|
done
|
|
|
|
# Store the config and kernel lists to entries that
|
|
# corresponds to the config name.
|
|
#config_registry[${cname}]=${clist}
|
|
#kernel_registry[${cname}]=${klist}
|
|
#printf -v "config_registry_${cname}" %s "${clist}"
|
|
#printf -v "kernel_registry_${cname}" %s "${klist}"
|
|
assign_key_value "config_registry" "${cname}" "${clist}"
|
|
assign_key_value "kernel_registry" "${cname}" "${klist}"
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
done < "${filename}"
|
|
|
|
if [ "${passnum}" == "0" ]; then
|
|
# Assign the final indirect blacklist (with whitespace removed).
|
|
indirect_blist="$(canonicalize_ws ${indirect_blist})"
|
|
fi
|
|
}
|
|
|
|
read_registry_file()
|
|
{
|
|
local filename
|
|
local clist klist
|
|
local iterate_again config
|
|
local cr_var mem mems_mem newclist
|
|
local kr_var ker kers_ker newklist
|
|
|
|
filename="$1"
|
|
|
|
# Execute an initial pass through the config_registry file so that
|
|
# we can accumulate a list of indirectly blacklisted configurations,
|
|
# if any.
|
|
pass_config_kernel_registries "${filename}" "0"
|
|
|
|
# Now that the indirect_blist has been created, make a second pass
|
|
# through the 'config_registry' file, this time creating the actual
|
|
# config and kernel registry data structures.
|
|
pass_config_kernel_registries "${filename}" "1"
|
|
|
|
# Now we must go back through the config_registry and subsitute any
|
|
# configuration families with their constituents' members. Each time
|
|
# one of these substitutions occurs, we set a flag that causes us to
|
|
# make one more pass. (Subsituting a singleton definition does not
|
|
# prompt additional iterations.) This process stops when a full pass
|
|
# does not result in any subsitution.
|
|
|
|
iterate_again="1"
|
|
while [ "${iterate_again}" == "1" ]; do
|
|
|
|
iterate_again="0"
|
|
|
|
#for config in "${!config_registry[@]}"; do
|
|
for cr_var in ${!config_registry_*}; do
|
|
|
|
config=${cr_var##config_registry_}
|
|
|
|
clist=$(query_array "config_registry" ${config})
|
|
|
|
# The entries that define singleton families should never need
|
|
# any substitution.
|
|
if [ $(is_singleton_family "${config}" "${clist}") == "true" ]; then
|
|
continue
|
|
fi
|
|
|
|
#for mem in ${config_registry[$config]}; do
|
|
#for mem in ${!cr_var}; do
|
|
for mem in ${clist}; do
|
|
|
|
#mems_mem="${config_registry[${mem}]}"
|
|
mems_mem=$(query_array "config_registry" ${mem})
|
|
|
|
# If mems_mem is empty string, then mem was not found as a key
|
|
# in the config list associative array. In that case, we continue
|
|
# and will echo an error later in the script.
|
|
if [ "${mems_mem}" == "" ]; then
|
|
#echo " config for ${mem} is empty string! no entry in config list."
|
|
continue;
|
|
fi
|
|
|
|
if [ "${mem}" != "${mems_mem}" ]; then
|
|
|
|
#clist="${config_registry[$config]}"
|
|
clist=$(query_array "config_registry" ${config})
|
|
|
|
# Replace the current config with its constituent config set,
|
|
# canonicalize whitespace, and then remove duplicate config
|
|
# set names, if they exist. Finally, update the config registry
|
|
# with the new config list.
|
|
newclist=$(echo -e "${clist}" | sed -e "s/${mem}/${mems_mem}/g")
|
|
newclist=$(canonicalize_ws "${newclist}")
|
|
newclist=$(rm_duplicate_words "${newclist}")
|
|
|
|
#config_registry[${config}]=${newclist}
|
|
#printf -v "config_registry_${config}" %s "${newclist}"
|
|
assign_key_value "config_registry" "${config}" "${newclist}"
|
|
|
|
# Since we performed a substitution and changed the config
|
|
# list, mark the iteration flag to continue another round,
|
|
# but only if the config (mem) value is NOT present
|
|
# in the list of sub-configs. If it is present, then further
|
|
# substitution may not necessarily be needed this round.
|
|
if [ $(is_in_list "${mem}" "${mems_mem}") == "false" ]; then
|
|
iterate_again="1"
|
|
fi
|
|
fi
|
|
done
|
|
done
|
|
done
|
|
|
|
# Similar to what we just did for the config_registry, we now iterate
|
|
# through the kernel_registry and substitute any configuration families
|
|
# in the kernel list (right side of ':') with the members of that
|
|
# family's kernel set. This process continues iteratively, as before,
|
|
# until all families have been replaced with singleton configurations'
|
|
# kernel sets.
|
|
|
|
iterate_again="1"
|
|
while [ "${iterate_again}" == "1" ]; do
|
|
|
|
iterate_again="0"
|
|
|
|
#for config in "${!kernel_registry[@]}"; do
|
|
for kr_var in ${!kernel_registry_*}; do
|
|
|
|
config=${kr_var##kernel_registry_}
|
|
|
|
klist=$(query_array "kernel_registry" ${config})
|
|
|
|
# The entries that define singleton families should never need
|
|
# any substitution. In the kernel registry, we know it's a
|
|
# singleton entry when the cname occurs somewhere in the klist.
|
|
# (This is slightly different than the same test in the config
|
|
# registry, where we test that clist is one word and that
|
|
# clist == cname.)
|
|
if [ $(is_in_list "${config}" "${klist}") == "true" ]; then
|
|
#echo "debug: '${config}' not found in '${klist}'; skipping."
|
|
continue
|
|
fi
|
|
|
|
#for ker in ${kernel_registry[$config]}; do
|
|
#for ker in ${!kr_var}; do
|
|
for ker in ${klist}; do
|
|
|
|
#kers_ker="${kernel_registry[${ker}]}"
|
|
kers_ker=$(query_array "kernel_registry" ${ker})
|
|
|
|
# If kers_ker is empty string, then ker was not found as a key
|
|
# in the kernel registry. While not common, this can happen
|
|
# when ker identifies a kernel set that does not correspond to
|
|
# any configuration. (Example: armv7a and armv8a kernel sets are
|
|
# used by cortexa* configurations, but do not corresond to their
|
|
# own configurations.)
|
|
if [ "${kers_ker}" == "" ]; then
|
|
#echo "debug: ${ker} not found in kernel registry."
|
|
continue
|
|
fi
|
|
|
|
# If the current config/kernel (ker) differs from its singleton kernel
|
|
# entry (kers_ker), then that singleton entry was specified to use
|
|
# a different configuration's kernel set. Thus, we need to replace the
|
|
# occurrence in the current config/kernel name with that of the kernel
|
|
# set it needs.
|
|
if [ "${ker}" != "${kers_ker}" ]; then
|
|
|
|
#klisttmp="${kernel_registry[$config]}"
|
|
klisttmp=$(query_array "kernel_registry" ${config})
|
|
|
|
# Replace the current config with its requisite kernels,
|
|
# canonicalize whitespace, and then remove duplicate kernel
|
|
# set names, if they exist. Finally, update the kernel registry
|
|
# with the new kernel list.
|
|
newklist=$(echo -e "${klisttmp}" | sed -e "s/${ker}/${kers_ker}/g")
|
|
newklist=$(canonicalize_ws "${newklist}")
|
|
newklist=$(rm_duplicate_words "${newklist}")
|
|
|
|
#kernel_registry[${config}]=${newklist}
|
|
#printf -v "kernel_registry_${config}" %s "${newklist}"
|
|
assign_key_value "kernel_registry" "${config}" "${newklist}"
|
|
|
|
# Since we performed a substitution and changed the kernel
|
|
# list, mark the iteration flag to continue another round,
|
|
# unless we just substituted using a singleton family
|
|
# definition, in which case we don't necessarily need to
|
|
# iterate further this round.
|
|
if [ $(is_in_list "${ker}" "${kers_ker}") == "false" ]; then
|
|
iterate_again="1"
|
|
fi
|
|
fi
|
|
done
|
|
done
|
|
done
|
|
}
|
|
|
|
build_kconfig_registry()
|
|
{
|
|
local familyname clist config kernels kernel cur_configs newvalue
|
|
|
|
familyname="$1"
|
|
|
|
#clist="${config_registry[${familyname}]}"
|
|
clist=$(query_array "config_registry" ${familyname})
|
|
|
|
for config in ${clist}; do
|
|
|
|
# Look up the kernels for the current sub-configuration.
|
|
#kernels="${kernel_registry[${config}]}"
|
|
kernels=$(query_array "kernel_registry" ${config})
|
|
|
|
for kernel in ${kernels}; do
|
|
|
|
# Add the sub-configuration to the list associated with the
|
|
# kernel.
|
|
|
|
# Query the current sub-configs for the current ${kernel}.
|
|
#cur_configs="${kconfig_registry[${kernel}]}"
|
|
cur_configs=$(query_array "kconfig_registry" ${kernel})
|
|
|
|
# Add the current sub-configuration to the list of sub-configs
|
|
# we just queried.
|
|
newvalue=$(canonicalize_ws "${cur_configs} ${config}")
|
|
|
|
# Update the array.
|
|
#kconfig_registry[${kernel}]="${newvalue}"
|
|
#printf -v "kconfig_registry_${kernel}" %s "${newvalue}"
|
|
assign_key_value "kconfig_registry" "${kernel}" "${newvalue}"
|
|
|
|
done
|
|
|
|
done
|
|
}
|
|
|
|
is_in_list()
|
|
{
|
|
local word list rval item
|
|
|
|
word="$1"
|
|
list="$2"
|
|
rval="false"
|
|
|
|
for item in ${list}; do
|
|
|
|
if [ "${item}" == "${word}" ]; then
|
|
rval="true"
|
|
break
|
|
fi
|
|
done
|
|
|
|
echo "${rval}"
|
|
}
|
|
|
|
is_singleton()
|
|
{
|
|
local list rval count_str item
|
|
|
|
list="$1"
|
|
rval="false"
|
|
|
|
count_str=""
|
|
for item in ${list}; do
|
|
|
|
count_str="${count_str}x"
|
|
done
|
|
|
|
if [ "${count_str}" == "x" ]; then
|
|
rval="true"
|
|
fi
|
|
|
|
echo "${rval}"
|
|
}
|
|
|
|
is_singleton_family()
|
|
{
|
|
local familyname memberlist rval
|
|
|
|
familyname="$1"
|
|
memberlist="$2"
|
|
|
|
rval="false"
|
|
|
|
if [ $(is_singleton "${memberlist}") ]; then
|
|
|
|
if [ "${memberlist}" == "${familyname}" ]; then
|
|
rval="true"
|
|
fi
|
|
fi
|
|
|
|
echo "${rval}"
|
|
}
|
|
|
|
remove_from_list()
|
|
{
|
|
local strike_list list flist item
|
|
|
|
strike_words="$1"
|
|
list="$2"
|
|
flist=""
|
|
|
|
for item in ${list}; do
|
|
|
|
# Filter out any list item that matches any of the strike words.
|
|
if [ $(is_in_list "${item}" "${strike_words}") == "false" ]; then
|
|
flist="${flist} ${item}"
|
|
fi
|
|
done
|
|
|
|
flist=$(canonicalize_ws "${flist}")
|
|
|
|
# Return the filtered list.
|
|
echo "${flist}"
|
|
}
|
|
|
|
canonicalize_ws()
|
|
{
|
|
local str
|
|
|
|
str="$1"
|
|
|
|
# Remove leading and trailing whitespace.
|
|
str=$(echo -e "${str}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')
|
|
|
|
# Remove duplicate spaces between words.
|
|
str=$(echo -e "${str}" | tr -s " ")
|
|
|
|
# Update the input argument.
|
|
echo "${str}"
|
|
}
|
|
|
|
rm_duplicate_words_simple()
|
|
{
|
|
local str revstr revres res
|
|
|
|
str="$1"
|
|
|
|
# Remote duplicates, keeping the first occurrence.
|
|
res=$(echo "${str}" | awk '{for (i=1;i<=NF;i++) if (!a[$i]++) printf("%s%s",$i,FS)}{printf("\n")}')
|
|
|
|
echo "${res}"
|
|
}
|
|
|
|
rm_duplicate_words()
|
|
{
|
|
local str revstr revres res
|
|
|
|
str="$1"
|
|
|
|
# We reverse the initial string, THEN remove duplicates, then reverse
|
|
# the de-duplicated result so that only the last instance is kept after
|
|
# removing duplicates (rather than keeping only the first). This is
|
|
# totally unnecessary but works well for the kinds of duplicates that
|
|
# show up in certain use cases of the config and kernel registries.
|
|
# For example, these gymnastics allow us to keep only the last instance
|
|
# of the 'generic' configuration in a configuration family that
|
|
# includes it twice or more.
|
|
revstr=$(echo "${str}" | awk '{ for (i=NF; i>1; i--) printf("%s ",$i); print $1; }')
|
|
revres=$(echo "${revstr}" | awk '{for (i=1;i<=NF;i++) if (!a[$i]++) printf("%s%s",$i,FS)}{printf("\n")}')
|
|
res=$(echo "${revres}" | awk '{ for (i=NF; i>1; i--) printf("%s ",$i); print $1; }')
|
|
|
|
echo "${res}"
|
|
}
|
|
|
|
get_cc_search_list()
|
|
{
|
|
local list
|
|
|
|
# For Linux, Darwin (OS X), and generic OSes, prioritize gcc.
|
|
list="gcc clang cc"
|
|
|
|
# For OpenBSD and FreeBSD, prioritize cc and clang over gcc.
|
|
if [ "${os_name}" = "OpenBSD" ]; then
|
|
list="cc clang gcc"
|
|
elif [ "${os_name}" = "FreeBSD" ]; then
|
|
list="cc clang gcc"
|
|
fi
|
|
|
|
echo "${list}"
|
|
}
|
|
|
|
get_cxx_search_list()
|
|
{
|
|
local list
|
|
|
|
# For Linux, Darwin (OS X), and generic OSes, prioritize g++.
|
|
list="g++ clang++ c++"
|
|
|
|
# For OpenBSD and FreeBSD, prioritize cc and clang over gcc.
|
|
if [ "${os_name}" = "OpenBSD" ]; then
|
|
list="c++ clang++ g++"
|
|
elif [ "${os_name}" = "FreeBSD" ]; then
|
|
list="c++ clang++ g++"
|
|
fi
|
|
|
|
echo "${list}"
|
|
}
|
|
|
|
select_tool()
|
|
{
|
|
local search_list CC_env the_cc cc
|
|
|
|
# This is the list of compilers/tools to search for, and the order in
|
|
# which to search for them.
|
|
search_list=$1
|
|
|
|
# The environment variable associated with the compiler/tool type we
|
|
# are searching (e.g. CC, CXX, PYTHON).
|
|
CC_env=$2
|
|
|
|
# If CC_env contains something, add it to the beginning of our default
|
|
# search list.
|
|
if [ -n "${CC_env}" ]; then
|
|
search_list="${CC_env} ${search_list}"
|
|
fi
|
|
|
|
# Initialize our selected compiler/tool to empty.
|
|
the_cc=""
|
|
|
|
# Try each compiler/tool in the list and select the first one we find that
|
|
# works.
|
|
for cc in ${search_list}; do
|
|
|
|
# See if the current compiler/tool works and/or is present.
|
|
${cc} --version > /dev/null 2>&1
|
|
|
|
if [ "$?" == 0 ]; then
|
|
the_cc=${cc}
|
|
break
|
|
fi
|
|
done
|
|
|
|
# Return the selected compiler/tool.
|
|
echo "${the_cc}"
|
|
}
|
|
|
|
auto_detect()
|
|
{
|
|
local cc cflags config_defines detected_config rval cmd
|
|
|
|
# Use the same compiler that was found earlier.
|
|
cc="${found_cc}"
|
|
|
|
# For debugging: reveal what compiler was chosen for auto-detection.
|
|
#touch "${cc}.txt"
|
|
|
|
# Tweak the flags we use based on the compiler. This is mostly just
|
|
# an opportunity to turn off annoying warnings that some compilers
|
|
# may throw off.
|
|
if [ "${cc}" == "clang" ]; then
|
|
cflags="-Wno-tautological-compare"
|
|
else
|
|
cflags=
|
|
fi
|
|
|
|
# Accumulate a list of source files we'll need to compile along with
|
|
# the top-level (root) directory in which they are located.
|
|
c_src_pairs=""
|
|
c_src_pairs="${c_src_pairs} frame:bli_arch.c"
|
|
c_src_pairs="${c_src_pairs} frame:bli_cpuid.c"
|
|
c_src_pairs="${c_src_pairs} frame:bli_env.c"
|
|
c_src_pairs="${c_src_pairs} build:config_detect.c"
|
|
|
|
# Accumulate a list of full filepaths to the source files listed above.
|
|
c_src_filepaths=""
|
|
for pair in ${c_src_pairs}; do
|
|
|
|
filename=${pair#*:}
|
|
rootdir=${pair%:*}
|
|
|
|
filepath=$(find ${dist_path}/${rootdir} -name "${filename}")
|
|
c_src_filepaths="${c_src_filepaths} ${filepath}"
|
|
done
|
|
|
|
# Accumulate a list of header files we'll need to locate along with
|
|
# the top-level (root) directory in which they are located.
|
|
c_hdr_pairs=""
|
|
c_hdr_pairs="${c_hdr_pairs} frame:bli_system.h"
|
|
c_hdr_pairs="${c_hdr_pairs} frame:bli_type_defs.h"
|
|
c_hdr_pairs="${c_hdr_pairs} frame:bli_arch.h"
|
|
c_hdr_pairs="${c_hdr_pairs} frame:bli_cpuid.h"
|
|
c_hdr_pairs="${c_hdr_pairs} frame:bli_env.h"
|
|
# NOTE: These headers are needed by bli_type_defs.h.
|
|
c_hdr_pairs="${c_hdr_pairs} frame:bli_malloc.h"
|
|
c_hdr_pairs="${c_hdr_pairs} frame:bli_pthread.h"
|
|
|
|
# Accumulate a list of full paths to the header files listed above.
|
|
# While we are at it, we include the "-I" compiler option to indicate
|
|
# adding the path to the list of directories to search when encountering
|
|
# #include directives.
|
|
c_hdr_paths=""
|
|
for pair in ${c_hdr_pairs}; do
|
|
|
|
filename=${pair#*:}
|
|
rootdir=${pair%:*}
|
|
|
|
filepath=$(find ${dist_path}/${rootdir} -name "${filename}")
|
|
path=${filepath%/*}
|
|
c_hdr_paths="${c_hdr_paths} -I${path}"
|
|
done
|
|
|
|
# Define the executable name.
|
|
autodetect_x="auto-detect.x"
|
|
|
|
# Create #defines for all of the BLIS_CONFIG_ macros in bli_cpuid.c.
|
|
bli_cpuid_c_filepath=$(find ${dist_path}/frame -name "bli_cpuid.c")
|
|
config_defines=$(grep BLIS_CONFIG_ ${bli_cpuid_c_filepath} \
|
|
| sed -e 's/#ifdef /-D/g')
|
|
|
|
# Set the linker flags. We typically need pthreads (or BLIS's homerolled
|
|
# equiavlent) because it is needed for parts of bli_arch.c unrelated to
|
|
# bli_arch_string(), which is called by the main() function in ${main_c}.
|
|
if [[ "$is_win" == "no" || "$cc_vendor" != "clang" ]]; then
|
|
ldflags="${LIBPTHREAD--lpthread}"
|
|
fi
|
|
|
|
# However, if --disable-system was given, we override the choice made above
|
|
# and do not use any pthread link flags.
|
|
if [[ "$enable_system" == "no" ]]; then
|
|
ldflags=
|
|
fi
|
|
|
|
# Compile the auto-detect program using source code inside the
|
|
# framework.
|
|
# NOTE: -D_GNU_SOURCE is needed to enable POSIX extensions to
|
|
# pthreads (i.e., barriers).
|
|
|
|
double_quote_open=\"\\\"
|
|
double_quote_close=\\\"\"
|
|
cmd="${cc} ${config_defines} \
|
|
-DBLIS_CONFIGURETIME_CPUID \
|
|
-D__blis_arch_type_name=${double_quote_open}${rename_blis_arch_type}${double_quote_close} \
|
|
${c_hdr_paths} \
|
|
-std=c99 -D_GNU_SOURCE \
|
|
${cflags} \
|
|
${c_src_filepaths} \
|
|
${ldflags} \
|
|
-o ${autodetect_x}"
|
|
|
|
if [ "${debug_auto_detect}" == "no" ]; then
|
|
|
|
# Execute the compilation command.
|
|
eval ${cmd}
|
|
|
|
else
|
|
|
|
# Debugging stuff. Instead of executing ${cmd}, join the lines together
|
|
# with tr and trim excess whitespace via awk.
|
|
cmd=$(echo "${cmd}" | tr '\n' ' ' | awk '{$1=$1;print}')
|
|
echo "${cmd}"
|
|
return
|
|
fi
|
|
|
|
# Run the auto-detect program.
|
|
detected_config=$(./${autodetect_x})
|
|
|
|
# Remove the executable file.
|
|
rm -f ./${autodetect_x}
|
|
|
|
# Return the detected sub-configuration name.
|
|
echo "${detected_config}"
|
|
}
|
|
|
|
has_libmemkind()
|
|
{
|
|
local main_c main_c_filepath LDFLAGS_mk binname rval
|
|
|
|
# Path to libmemkind detection source file.
|
|
main_c="libmemkind_detect.c"
|
|
main_c_filepath=$(find ${dist_path}/build -name "${main_c}")
|
|
|
|
# Add libmemkind to LDFLAGS.
|
|
LDFLAGS_mk="${LDFLAGS} -lmemkind"
|
|
|
|
# Binary executable filename.
|
|
binname="libmemkind-detect.x"
|
|
|
|
# Attempt to compile a simple main() program that contains a call
|
|
# to hbw_malloc() and that links to libmemkind.
|
|
${found_cc} -o ${binname} ${main_c_filepath} ${LDFLAGS_mk} 2> /dev/null
|
|
|
|
# Depending on the return code from the compile step above, we set
|
|
# enable_memkind accordingly.
|
|
if [ "$?" == 0 ]; then
|
|
rval='yes'
|
|
else
|
|
rval='no'
|
|
fi
|
|
|
|
# Remove the executable generated above.
|
|
rm -f ./${binname}
|
|
|
|
echo "${rval}"
|
|
}
|
|
|
|
has_pragma_omp_simd()
|
|
{
|
|
local main_c main_c_filepath binname rval
|
|
|
|
# Path to omp-simd detection source file.
|
|
main_c="omp_simd_detect.c"
|
|
main_c_filepath=$(find ${dist_path}/build -name "${main_c}")
|
|
|
|
# Binary executable filename.
|
|
binname="omp_simd-detect.x"
|
|
|
|
# Attempt to compile a simple main() program that contains a
|
|
# #pragma omp simd.
|
|
${found_cc} -std=c99 -O3 -march=native -fopenmp-simd \
|
|
-o ${binname} ${main_c_filepath} 2> /dev/null
|
|
|
|
# Depending on the return code from the compile step above, we set
|
|
# enable_memkind accordingly.
|
|
if [ "$?" == 0 ]; then
|
|
rval='yes'
|
|
else
|
|
rval='no'
|
|
fi
|
|
|
|
# Remove the executable generated above.
|
|
rm -f ./${binname}
|
|
|
|
echo "${rval}"
|
|
}
|
|
|
|
echoerr()
|
|
{
|
|
printf "${script_name}: error: %s\n" "$*" #>&2;
|
|
}
|
|
|
|
echowarn()
|
|
{
|
|
printf "${script_name}: warning: %s\n" "$*" #>&2;
|
|
}
|
|
|
|
blacklistcc_add()
|
|
{
|
|
# Check whether we've already blacklisted the given sub-config so
|
|
# we don't output redundant messages.
|
|
if [ $(is_in_list "$1" "${config_blist}") == "false" ]; then
|
|
|
|
echowarn "${cc_vendor} ${cc_version} does not support '$1'; adding to blacklist."
|
|
config_blist="${config_blist} $1"
|
|
fi
|
|
}
|
|
|
|
blacklistbu_add()
|
|
{
|
|
# Check whether we've already blacklisted the given sub-config so
|
|
# we don't output redundant messages.
|
|
if [ $(is_in_list "$1" "${config_blist}") == "false" ]; then
|
|
|
|
echowarn "assembler ('as' ${bu_version}) does not support '$1'; adding to blacklist."
|
|
config_blist="${config_blist} $1"
|
|
fi
|
|
}
|
|
|
|
blacklist_init()
|
|
{
|
|
config_blist=""
|
|
}
|
|
|
|
blacklist_cleanup()
|
|
{
|
|
# Remove duplicates and whitespace from the blacklist.
|
|
config_blist=$(rm_duplicate_words "${config_blist}")
|
|
config_blist=$(canonicalize_ws "${config_blist}")
|
|
}
|
|
|
|
echoerr_unsupportedcc()
|
|
{
|
|
echoerr "${script_name}: *** Unsupported compiler version: ${cc_vendor} ${cc_version}."
|
|
exit 1
|
|
}
|
|
|
|
echoerr_unsupportedpython()
|
|
{
|
|
echoerr "${script_name}: *** Unsupported python version: ${python_version}."
|
|
exit 1
|
|
}
|
|
|
|
get_binutils_version()
|
|
{
|
|
binutil=${AS:-as}
|
|
|
|
# Query the full binutils version string output. This includes the
|
|
# version string along with (potentially) a bunch of other textual
|
|
# clutter.
|
|
if [ "$(uname -s)" == "Darwin" ]; then
|
|
# The default OS X assembler uses a trifecta of brain-dead
|
|
# conventions: responding only to '-v', hanging indefinitely if
|
|
# not given an argument, and outputing the result to stderr.
|
|
# (And if you still weren't convinced, it creates an 'a.out'
|
|
# by default. So yeah.)
|
|
bu_string=$(${binutil} -v /dev/null -o /dev/null 2>&1)
|
|
else
|
|
bu_string=$(${binutil} --version 2>/dev/null)
|
|
fi
|
|
|
|
# Query the binutils version number.
|
|
# The last part ({ read first rest ; echo $first ; }) is a workaround
|
|
# to OS X's egrep only returning the first match.
|
|
bu_version=$(echo "${bu_string}" | egrep -o '[0-9]+\.[0-9]+\.?[0-9]*' | { read first rest ; echo ${first} ; })
|
|
|
|
# Parse the version number into its major, minor, and revision
|
|
# components.
|
|
bu_major=$(echo "${bu_version}" | cut -d. -f1)
|
|
bu_minor=$(echo "${bu_version}" | cut -d. -f2)
|
|
bu_revision=$(echo "${bu_version}" | cut -d. -f3)
|
|
|
|
echo "${script_name}: found assembler ('as') version ${bu_version} (maj: ${bu_major}, min: ${bu_minor}, rev: ${bu_revision})."
|
|
}
|
|
|
|
get_python_search_list()
|
|
{
|
|
local list
|
|
|
|
# For Linux, Darwin (OS X), and generic OSes, prioritize 'python'.
|
|
list="python python3 python2"
|
|
|
|
echo "${list}"
|
|
}
|
|
|
|
get_python_version()
|
|
{
|
|
local python vendor_string
|
|
|
|
python="${found_python}"
|
|
|
|
# Query the python version. This includes the version number along
|
|
# with other text, such as "Python ".
|
|
# NOTE: Python seems to echo its version info to stderr, not
|
|
# stdout, and thus we redirect stderr to stdout and capture that.
|
|
vendor_string="$(${python} --version 2>&1)"
|
|
|
|
# Drop any preceding text and save only the first numbers and what
|
|
# comes after.
|
|
python_version=$(echo "${vendor_string}" | sed -e "s/[a-zA-Z_ ]* \([0-9]*\..*\)/\1/g")
|
|
# Parse the version number into its major, minor, and revision
|
|
# components.
|
|
python_major=$(echo "${python_version}" | cut -d. -f1)
|
|
python_minor=$(echo "${python_version}" | cut -d. -f2)
|
|
python_revision=$(echo "${python_version}" | cut -d. -f3)
|
|
|
|
echo "${script_name}: found python version ${python_version} (maj: ${python_major}, min: ${python_minor}, rev: ${python_revision})."
|
|
}
|
|
|
|
check_python()
|
|
{
|
|
local python
|
|
|
|
python="${found_python}"
|
|
|
|
#
|
|
# Python requirements
|
|
#
|
|
# python1: no versions supported
|
|
# python2: 2.7+
|
|
# python3: 3.4+
|
|
#
|
|
# NOTE: It's actually unclear whether python 3.0 through 3.3.x would work.
|
|
# Python 3.5 is the oldest python3 that I have available to test with, and
|
|
# I only know that 3.4 will work thanks to feedback from Dave Love. So it's
|
|
# quite possible that some of those "unsupported" python3 versions are
|
|
# sufficient. -FGVZ
|
|
#
|
|
|
|
# Python 1.x is unsupported.
|
|
if [ ${python_major} -eq 1 ]; then
|
|
echoerr_unsupportedpython
|
|
fi
|
|
|
|
# Python 2.6.x or older is unsupported.
|
|
if [ ${python_major} -eq 2 ]; then
|
|
if [ ${python_minor} -lt 7 ]; then
|
|
echoerr_unsupportedpython
|
|
fi
|
|
fi
|
|
|
|
# Python 3.3.x or older is unsupported.
|
|
if [ ${python_major} -eq 3 ]; then
|
|
if [ ${python_minor} -lt 4 ]; then
|
|
echoerr_unsupportedpython
|
|
fi
|
|
fi
|
|
|
|
echo "${script_name}: python ${python_version} appears to be supported."
|
|
}
|
|
|
|
get_compiler_version()
|
|
{
|
|
local cc vendor_string
|
|
|
|
cc="${found_cc}"
|
|
|
|
# Query the full vendor version string output. This includes the
|
|
# version number along with (potentially) a bunch of other textual
|
|
# clutter.
|
|
# NOTE: This maybe should use merged stdout/stderr rather than only
|
|
# stdout. But it works for now.
|
|
vendor_string="$(${cc} --version 2>/dev/null)"
|
|
|
|
# Query the compiler "vendor" (ie: the compiler's simple name) and
|
|
# isolate the version number.
|
|
# The last part ({ read first rest ; echo $first ; }) is a workaround
|
|
# to OS X's egrep only returning the first match.
|
|
cc_vendor=$(echo "${vendor_string}" | egrep -o 'icc|gcc|clang|emcc|pnacl|IBM|oneAPI|crosstool-NG' | { read first rest ; echo $first ; })
|
|
if [ "${cc_vendor}" = "crosstool-NG" ]; then
|
|
# Treat compilers built by crosstool-NG (for eg: conda) as gcc.
|
|
cc_vendor="gcc"
|
|
fi
|
|
if [ "${cc_vendor}" = "icc" -o \
|
|
"${cc_vendor}" = "gcc" ]; then
|
|
cc_version=$(${cc} -dumpversion)
|
|
# If compiler is AOCC, first grep for clang and then the version number.
|
|
elif [ "${cc_vendor}" = "clang" ]; then
|
|
cc_version=$(echo "${vendor_string}" | egrep -o 'clang version [0-9]+\.[0-9]+\.?[0-9]*' | egrep -o '[0-9]+\.[0-9]+\.?[0-9]*')
|
|
elif [ "${cc_vendor}" = "oneAPI" ]; then
|
|
# Treat Intel oneAPI's clang as clang, not icc.
|
|
cc_vendor="clang"
|
|
cc_version=$(echo "${vendor_string}" | egrep -o '[0-9]+\.[0-9]+\.[0-9]+\.?[0-9]*' | { read first rest ; echo ${first} ; })
|
|
else
|
|
cc_version=$(echo "${vendor_string}" | egrep -o '[0-9]+\.[0-9]+\.?[0-9]*' | { read first rest ; echo ${first} ; })
|
|
fi
|
|
|
|
# Parse the version number into its major, minor, and revision
|
|
# components.
|
|
cc_major=$(echo "${cc_version}" | cut -d. -f1)
|
|
cc_minor=$(echo "${cc_version}" | cut -d. -f2)
|
|
cc_revision=$(echo "${cc_version}" | cut -d. -f3)
|
|
|
|
# gcc 7 introduced new behavior to -dumpversion whereby only the major
|
|
# version component is output. However, as part of this change, gcc 7
|
|
# also introduced a new option, -dumpfullversion, which is guaranteed to
|
|
# always output the major, minor, and revision numbers. Thus, if we're
|
|
# using gcc and its version is 7 or later, we re-query and re-parse the
|
|
# version string.
|
|
if [ "${cc_vendor}" = "gcc" -a ${cc_major} -ge 7 ]; then
|
|
|
|
# Re-query the version number using -dumpfullversion.
|
|
cc_version=$(${cc} -dumpfullversion)
|
|
|
|
# And parse the result.
|
|
cc_major=$(echo "${cc_version}" | cut -d. -f1)
|
|
cc_minor=$(echo "${cc_version}" | cut -d. -f2)
|
|
cc_revision=$(echo "${cc_version}" | cut -d. -f3)
|
|
fi
|
|
|
|
echo "${script_name}: found ${cc_vendor} version ${cc_version} (maj: ${cc_major}, min: ${cc_minor}, rev: ${cc_revision})."
|
|
}
|
|
|
|
check_compiler()
|
|
{
|
|
local cc
|
|
|
|
cc="${found_cc}"
|
|
|
|
#
|
|
# Compiler requirements
|
|
#
|
|
# General:
|
|
#
|
|
# icc 15+, gcc 4.7+, clang 3.3+
|
|
#
|
|
# Specific:
|
|
#
|
|
# skx: icc 15.0.1+, gcc 6.0+, clang 3.9+
|
|
# knl: icc 14.0.1+, gcc 5.0+, clang 3.9+
|
|
# haswell: any
|
|
# sandybridge: any
|
|
# penryn: any
|
|
#
|
|
# zen: gcc 6.0+[1], clang 4.0+
|
|
# excavator: gcc 4.9+, clang 3.5+
|
|
# steamroller: any
|
|
# piledriver: any
|
|
# bulldozer: any
|
|
#
|
|
# cortexa57: any
|
|
# cortexa15: any
|
|
# cortexa9: any
|
|
#
|
|
# generic: any
|
|
#
|
|
# Note: These compiler requirements were originally modeled after similar
|
|
# requirements encoded into TBLIS's configure.ac [2].
|
|
#
|
|
# [1] While gcc 6.0 or newer is needed for zen support (-march=znver1),
|
|
# we relax this compiler version constraint a bit by targeting bdver4
|
|
# and then disabling the instruction sets that were removed in the
|
|
# transition from bdver4 to znver1. (See config/zen/make_defs.mk for
|
|
# the specific compiler flags used.)
|
|
# [2] https://github.com/devinamatthews/tblis/
|
|
#
|
|
|
|
echo "${script_name}: checking for blacklisted configurations due to ${cc} ${cc_version}."
|
|
|
|
# gcc
|
|
if [ "x${cc_vendor}" = "xgcc" ]; then
|
|
|
|
if [ ${cc_major} -lt 4 ]; then
|
|
echoerr_unsupportedcc
|
|
fi
|
|
if [ ${cc_major} -eq 4 ]; then
|
|
blacklistcc_add "knl"
|
|
if [ ${cc_minor} -lt 7 ]; then
|
|
echoerr_unsupportedcc
|
|
fi
|
|
if [ ${cc_minor} -lt 9 ]; then
|
|
blacklistcc_add "excavator"
|
|
blacklistcc_add "zen"
|
|
fi
|
|
fi
|
|
if [ ${cc_major} -lt 5 ]; then
|
|
blacklistcc_add "knl"
|
|
fi
|
|
if [ ${cc_major} -lt 6 ]; then
|
|
# Normally, zen would be blacklisted for gcc prior to 6.0.
|
|
# However, we have a workaround in place in the zen
|
|
# configuration's make_defs.mk file that starts with bdver4
|
|
# and disables the instructions that were removed in znver1.
|
|
# Thus, this "blacklistcc_add" statement has been moved above.
|
|
#blacklistcc_add "zen"
|
|
blacklistcc_add "skx"
|
|
# gcc 5.x may support POWER9 but it is unverified.
|
|
blacklistcc_add "power9"
|
|
fi
|
|
fi
|
|
|
|
# icc
|
|
if [ "x${cc_vendor}" = "xicc" ]; then
|
|
|
|
if [ ${cc_major} -lt 15 ]; then
|
|
echoerr_unsupportedcc
|
|
fi
|
|
if [ ${cc_major} -eq 15 ]; then
|
|
if [ ${cc_revision} -lt 1 ]; then
|
|
blacklistcc_add "skx"
|
|
fi
|
|
fi
|
|
if [ ${cc_major} -eq 18 ]; then
|
|
echo "${script_name}: ${cc} ${cc_version} is known to cause erroneous results. See https://github.com/flame/blis/issues/371 for details."
|
|
blacklistcc_add "knl"
|
|
blacklistcc_add "skx"
|
|
fi
|
|
if [ ${cc_major} -ge 19 ]; then
|
|
echo "${script_name}: ${cc} ${cc_version} is known to cause erroneous results. See https://github.com/flame/blis/issues/371 for details."
|
|
echoerr_unsupportedcc
|
|
fi
|
|
fi
|
|
|
|
# clang
|
|
if [ "x${cc_vendor}" = "xclang" ]; then
|
|
if [ "$(echo ${vendor_string} | grep -o Apple)" = "Apple" ]; then
|
|
if [ ${cc_major} -lt 5 ]; then
|
|
echoerr_unsupportedcc
|
|
fi
|
|
# See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions
|
|
if [ ${cc_major} -eq 5 ]; then
|
|
# Apple clang 5.0 is clang 3.4svn
|
|
blacklistcc_add "excavator"
|
|
blacklistcc_add "zen"
|
|
fi
|
|
if [ ${cc_major} -lt 7 ]; then
|
|
blacklistcc_add "knl"
|
|
blacklistcc_add "skx"
|
|
fi
|
|
else
|
|
if [ ${cc_major} -lt 3 ]; then
|
|
echoerr_unsupportedcc
|
|
fi
|
|
if [ ${cc_major} -eq 3 ]; then
|
|
if [ ${cc_minor} -lt 3 ]; then
|
|
echoerr_unsupportedcc
|
|
fi
|
|
if [ ${cc_minor} -lt 5 ]; then
|
|
blacklistcc_add "excavator"
|
|
blacklistcc_add "zen"
|
|
fi
|
|
if [ ${cc_minor} -lt 9 ]; then
|
|
blacklistcc_add "knl"
|
|
blacklistcc_add "skx"
|
|
fi
|
|
fi
|
|
if [ ${cc_major} -lt 4 ]; then
|
|
# See comment above regarding zen support.
|
|
#blacklistcc_add "zen"
|
|
: # explicit no-op since bash can't handle empty loop bodies.
|
|
fi
|
|
fi
|
|
fi
|
|
}
|
|
|
|
check_compiler_version_ranges()
|
|
{
|
|
local cc
|
|
|
|
cc="${found_cc}"
|
|
|
|
#
|
|
# We check for various compiler version ranges that may cause us
|
|
# issues in properly supporting those compiler versions within the
|
|
# BLIS build system.
|
|
#
|
|
# range: gcc < 4.9.0 (ie: 4.8.5 or older)
|
|
# variable: gcc_older_than_4_9_0
|
|
# comments:
|
|
# These older versions of gcc may support microarchitectures such as
|
|
# sandybridge, but the '-march=' flag uses a different label syntax.
|
|
# In newer versions, '-march=sandybridge' is the preferred syntax [1].
|
|
# However, in older versions, the syntax for the same compiler option
|
|
# is '-march=corei7-avx' [2].
|
|
#
|
|
# [1] https://gcc.gnu.org/onlinedocs/gcc-4.9.0/gcc/i386-and-x86-64-Options.html#i386-and-x86-64-Options
|
|
# [2] https://gcc.gnu.org/onlinedocs/gcc-4.8.5/gcc/i386-and-x86-64-Options.html#i386-and-x86-64-Options
|
|
#
|
|
# range: gcc < 6.1 (ie: 5.5 or older)
|
|
# variable: gcc_older_than_6_1_0
|
|
# comments:
|
|
# These older versions of gcc do not explicitly support the Zen (Zen1)
|
|
# microarchitecture; the newest microarchitectural value understood by
|
|
# these versions is '-march=bdver4' [3]. However, basic support for these
|
|
# older versions can be attained in a roundabout way by starting with the
|
|
# instruction sets enabled by '-march=bdver4' and then disabling the
|
|
# instruction sets that were removed in the transition from Excavator to
|
|
# Zen, namely: FMA4, TBM, XOP, and LWP. Newer versions of gcc support Zen
|
|
# via the '-march=znver1' option [4].
|
|
#
|
|
# [3] https://gcc.gnu.org/onlinedocs/gcc-5.5.0/gcc/x86-Options.html#x86-Options
|
|
# [4] https://gcc.gnu.org/onlinedocs/gcc-6.1.0/gcc/x86-Options.html#x86-Options
|
|
#
|
|
# range: gcc < 9.1 (ie: 8.3 or older)
|
|
# variable: gcc_older_than_9_1_0
|
|
# comments:
|
|
# These older versions of gcc do not explicitly support the Zen2
|
|
# microarchitecture; the newest microarchitectural value understood by
|
|
# these versions is either '-march=znver1' (if !gcc_older_than_6_1_0) [5]
|
|
# or '-march=bdver4' (if gcc_older_than_6_1_0) [3]. If gcc is 6.1 or
|
|
# newer, '-march=znver1' may be used (since the instruction sets it
|
|
# enables are a subset of those enabled by '-march=znver2'); otherwise,
|
|
# '-march=bdver4' must be used in conjuction with disabling the
|
|
# instruction sets that were removed in the transition from Excavator to
|
|
# Zen, as described in the section above for gcc_older_than_6_1_0.
|
|
# Newer versions of gcc support Zen2 via the '-march=znver2' option [6].
|
|
#
|
|
# [5] https://gcc.gnu.org/onlinedocs/gcc-8.3.0/gcc/x86-Options.html#x86-Options
|
|
# [6] https://gcc.gnu.org/onlinedocs/gcc-9.1.0/gcc/x86-Options.html#x86-Options
|
|
#
|
|
|
|
gcc_older_than_4_9_0='no'
|
|
gcc_older_than_6_1_0='no'
|
|
gcc_older_than_9_1_0='no'
|
|
|
|
echo "${script_name}: checking ${cc} ${cc_version} against known consequential version ranges."
|
|
|
|
# gcc
|
|
if [ "x${cc_vendor}" = "xgcc" ]; then
|
|
|
|
# Check for gcc < 4.9.0 (ie: 4.8.5 or older).
|
|
if [ ${cc_major} -eq 4 ]; then
|
|
if [ ${cc_minor} -lt 9 ]; then
|
|
echo "${script_name}: note: found ${cc} version older than 4.9.0."
|
|
gcc_older_than_4_9_0='yes'
|
|
fi
|
|
fi
|
|
|
|
# Check for gcc < 6.1.0 (ie: 5.5 or older).
|
|
if [ ${cc_major} -lt 6 ]; then
|
|
echo "${script_name}: note: found ${cc} version older than 6.1."
|
|
gcc_older_than_6_1_0='yes'
|
|
fi
|
|
|
|
# Check for gcc < 9.1.0 (ie: 8.3 or older).
|
|
if [ ${cc_major} -lt 9 ]; then
|
|
echo "${script_name}: note: found ${cc} version older than 9.1."
|
|
gcc_older_than_9_1_0='yes'
|
|
fi
|
|
fi
|
|
|
|
# icc
|
|
if [ "x${cc_vendor}" = "xicc" ]; then
|
|
:
|
|
fi
|
|
|
|
# clang
|
|
if [ "x${cc_vendor}" = "xclang" ]; then
|
|
:
|
|
fi
|
|
}
|
|
|
|
check_assembler()
|
|
{
|
|
local cc asm_dir cflags asm_fp
|
|
|
|
cc="${found_cc}"
|
|
|
|
# The directory where the assembly files will be.
|
|
asm_dir="${dist_path}/build"
|
|
|
|
# Most of the time, we won't need any additional compiler flags.
|
|
cflags=""
|
|
|
|
echo "${script_name}: checking for blacklisted configurations due to as ${bu_version}."
|
|
|
|
#
|
|
# Check support for FMA4 (amd: bulldozer).
|
|
#
|
|
asm_fp=$(find ${asm_dir} -name "fma4.s")
|
|
knows_fma4=$(try_assemble "${cc}" "${cflags}" "${asm_fp}")
|
|
|
|
if [ "x${knows_fma4}" == "xno" ]; then
|
|
blacklistbu_add "bulldozer"
|
|
fi
|
|
|
|
#
|
|
# Check support for AVX (intel: sandybridge+, amd: piledriver+).
|
|
#
|
|
asm_fp=$(find ${asm_dir} -name "avx.s")
|
|
knows_avx=$(try_assemble "${cc}" "${cflags}" "${asm_fp}")
|
|
|
|
if [ "x${knows_avx}" == "xno" ]; then
|
|
blacklistbu_add "sandybridge"
|
|
fi
|
|
|
|
#
|
|
# Check support for FMA3 (intel: haswell+, amd: piledriver+).
|
|
#
|
|
asm_fp=$(find ${asm_dir} -name "fma3.s")
|
|
knows_fma3=$(try_assemble "${cc}" "${cflags}" "${asm_fp}")
|
|
|
|
if [ "x${knows_fma3}" == "xno" ]; then
|
|
blacklistbu_add "haswell"
|
|
blacklistbu_add "piledriver"
|
|
blacklistbu_add "steamroller"
|
|
blacklistbu_add "excavator"
|
|
blacklistbu_add "skx"
|
|
fi
|
|
|
|
#
|
|
# Check support for AVX-512f (knl, skx).
|
|
#
|
|
|
|
# The assembler on OS X won't recognize AVX-512 without help.
|
|
if [ "${cc_vendor}" == "clang" ]; then
|
|
cflags="-march=knl"
|
|
fi
|
|
|
|
asm_fp=$(find ${asm_dir} -name "avx512f.s")
|
|
knows_avx512f=$(try_assemble "${cc}" "${cflags}" "${asm_fp}")
|
|
|
|
if [ "x${knows_avx512f}" == "xno" ]; then
|
|
blacklistbu_add "knl"
|
|
blacklistbu_add "skx"
|
|
fi
|
|
|
|
#
|
|
# Check support for AVX-512dq (skx).
|
|
#
|
|
|
|
# The assembler on OS X won't recognize AVX-512 without help.
|
|
if [ "${cc_vendor}" == "clang" ]; then
|
|
cflags="-march=skylake-avx512"
|
|
fi
|
|
|
|
asm_fp=$(find ${asm_dir} -name "avx512dq.s")
|
|
knows_avx512dq=$(try_assemble "${cc}" "${cflags}" "${asm_fp}")
|
|
|
|
if [ "x${knows_avx512dq}" == "xno" ]; then
|
|
blacklistbu_add "skx"
|
|
fi
|
|
}
|
|
|
|
try_assemble()
|
|
{
|
|
local cc cflags asm_src asm_base asm_bin rval
|
|
|
|
cc="$1"
|
|
cflags="$2"
|
|
asm_src="$3"
|
|
|
|
# Construct the filename to the .o file corresponding to asm_src.
|
|
# (Strip the filepath, then the file extension, and then add ".o".)
|
|
asm_base=${asm_src##*/}
|
|
asm_base=${asm_base%.*}
|
|
asm_bin="${asm_base}.o"
|
|
|
|
# Try to assemble the file.
|
|
${cc} ${cflags} -c ${asm_src} -o ${asm_bin} > /dev/null 2>&1
|
|
|
|
if [ "$?" == 0 ]; then
|
|
rval='yes'
|
|
else
|
|
rval='no'
|
|
fi
|
|
|
|
# Remove the object file.
|
|
rm -f "${asm_bin}"
|
|
|
|
# Return the result.
|
|
echo "${rval}"
|
|
}
|
|
|
|
set_default_version()
|
|
{
|
|
# The path to the version file.
|
|
version_file=$1
|
|
|
|
echo "${script_name}: determining default version string."
|
|
|
|
# Use what's in the version file as-is.
|
|
version="AOCL-BLIS $(cat "${version_file}") Build $(date +%Y%m%d)"
|
|
}
|
|
|
|
|
|
|
|
#
|
|
# -- main function -------------------------------------------------------------
|
|
#
|
|
|
|
main()
|
|
{
|
|
#declare -A config_registry
|
|
#declare -A kernel_registry
|
|
#declare -A kconfig_registry
|
|
|
|
# -- Basic names and paths --
|
|
|
|
# The name of the script, stripped of any preceeding path.
|
|
script_name=${0##*/}
|
|
|
|
# The path to the script. We need this to find the top-level directory
|
|
# of the source distribution in the event that the user has chosen to
|
|
# build elsewhere.
|
|
dist_path=${0%/${script_name}}
|
|
|
|
# The path to the directory in which we are building. We do this to
|
|
# make explicit that we distinguish between the top-level directory
|
|
# of the distribution and the directory in which we are building.
|
|
cur_dirpath="."
|
|
|
|
# The file in which the version string is kept.
|
|
version_file="version"
|
|
version_filepath="${dist_path}/${version_file}"
|
|
|
|
# The name of and path to the directory named "build" in the top-level
|
|
# directory of the source distribution.
|
|
build_dir='build'
|
|
build_dirpath="${dist_path}/${build_dir}"
|
|
|
|
# The name/path to the registry (master list) of supported configurations.
|
|
registry_file="config_registry"
|
|
registry_filepath=${dist_path}/${registry_file}
|
|
|
|
# The names/paths for the template config.mk.in and its instantiated
|
|
# counterpart.
|
|
config_mk_in='config.mk.in'
|
|
config_mk_out='config.mk'
|
|
config_mk_in_path="${build_dirpath}/${config_mk_in}"
|
|
config_mk_out_path="${cur_dirpath}/${config_mk_out}"
|
|
|
|
# The names/paths for the template bli_config.h.in and its instantiated
|
|
# counterpart.
|
|
bli_config_h_in='bli_config.h.in'
|
|
bli_config_h_out='bli_config.h'
|
|
bli_config_h_in_path="${build_dirpath}/${bli_config_h_in}"
|
|
bli_config_h_out_path="${cur_dirpath}/${bli_config_h_out}"
|
|
|
|
# The names/paths for the template bli_addon.h.in and its instantiated
|
|
# counterpart.
|
|
bli_addon_h_in='bli_addon.h.in'
|
|
bli_addon_h_out='bli_addon.h'
|
|
bli_addon_h_in_path="${build_dirpath}/${bli_addon_h_in}"
|
|
bli_addon_h_out_path="${cur_dirpath}/${bli_addon_h_out}"
|
|
|
|
# Path to 'mirror-tree.sh' script.
|
|
mirror_tree_sh="${build_dirpath}/mirror-tree.sh"
|
|
|
|
# Path to 'gen-make-frags.sh' script and directory.
|
|
gen_make_frags_dirpath="${build_dirpath}/gen-make-frags"
|
|
gen_make_frags_sh="${gen_make_frags_dirpath}/gen-make-frag.sh"
|
|
|
|
# The name of the (top-level) configuration directory.
|
|
config_dir='config'
|
|
config_dirpath="${dist_path}/${config_dir}"
|
|
|
|
# The name of the (top-level) kernels directory.
|
|
kernels_dir='kernels'
|
|
kernels_dirpath="${dist_path}/${kernels_dir}"
|
|
|
|
# The name of the (top-level) reference kernels directory.
|
|
refkern_dir='ref_kernels'
|
|
refkern_dirpath="${dist_path}/${refkern_dir}"
|
|
|
|
# The root directory of the BLIS framework.
|
|
frame_dir='frame'
|
|
frame_dirpath="${dist_path}/${frame_dir}"
|
|
|
|
# The root directory of the BLIS framework.
|
|
aocldtl_dir='aocl_dtl'
|
|
aocldtl_dirpath="${dist_path}/${aocldtl_dir}"
|
|
# The names of the addons.
|
|
addon_dir='addon'
|
|
addon_dirpath="${dist_path}/${addon_dir}"
|
|
|
|
# The name of the sandbox directory.
|
|
sandbox_dir='sandbox'
|
|
sandbox_dirpath="${dist_path}/${sandbox_dir}"
|
|
|
|
# The name of the directory in which object files will be kept.
|
|
obj_dir='obj'
|
|
obj_dirpath="${cur_dirpath}/${obj_dir}"
|
|
|
|
# The name of the directory in which libraries will be kept.
|
|
lib_dir='lib'
|
|
lib_dirpath="${cur_dirpath}/${lib_dir}"
|
|
|
|
# The name of the directory in which headers will be kept.
|
|
include_dir='include'
|
|
include_dirpath="${cur_dirpath}/${include_dir}"
|
|
|
|
# The name of the directory in which the BLAS test suite is kept.
|
|
blastest_dir='blastest'
|
|
|
|
# The name of the directory in which the BLIS test suite is kept.
|
|
testsuite_dir='testsuite'
|
|
|
|
# -- Version-related --
|
|
|
|
# The shared library (.so) version file.
|
|
so_version_file='so_version'
|
|
so_version_filepath="${dist_path}/${so_version_file}"
|
|
|
|
# The major and minor/build .so version numbers.
|
|
so_version_major=''
|
|
so_version_minorbuild=''
|
|
|
|
# -- configure options --
|
|
|
|
# Define the default prefix so that the print_usage() function can
|
|
# output it in the --help text.
|
|
prefix_def='/usr/local'
|
|
|
|
# The installation prefix, assigned its default value, and a flag to
|
|
# track whether or not it was given by the user.
|
|
prefix=${prefix_def}
|
|
prefix_flag=''
|
|
|
|
# The installation exec_prefix, assigned its default value, and a flag to
|
|
# track whether or not it was given by the user.
|
|
exec_prefix='${prefix}'
|
|
exec_prefix_flag=''
|
|
|
|
# The installation libdir, assigned its default value, and a flag to
|
|
# track whether or not it was given by the user.
|
|
libdir='${exec_prefix}/lib'
|
|
libdir_flag=''
|
|
|
|
# The installation includedir, assigned its default value, and a flag to
|
|
# track whether or not it was given by the user.
|
|
includedir='${prefix}/include'
|
|
includedir_flag=''
|
|
|
|
# The installation sharedir, assigned its default value, and a flag to
|
|
# track whether or not it was given by the user.
|
|
sharedir='${prefix}/share'
|
|
sharedir_flag=''
|
|
|
|
# The preset value of CFLAGS and LDFLAGS (ie: compiler and linker flags
|
|
# to use in addition to those determined by the build system).
|
|
cflags_preset=''
|
|
ldflags_preset=''
|
|
|
|
# The user-given debug type and a flag indicating it was given.
|
|
debug_type=''
|
|
debug_flag=''
|
|
|
|
# The system flag.
|
|
enable_system='yes'
|
|
|
|
# The threading flag.
|
|
threading_model='off'
|
|
|
|
# The method of assigning micropanels to threads in the JR and JR loops.
|
|
thread_part_jrir='slab'
|
|
|
|
# Option variables.
|
|
quiet_flag=''
|
|
show_config_list=''
|
|
|
|
# Additional flags.
|
|
enable_verbose='no'
|
|
enable_arg_max_hack='no'
|
|
enable_static='yes'
|
|
enable_shared='yes'
|
|
export_shared='public'
|
|
enable_pba_pools='yes'
|
|
enable_sba_pools='yes'
|
|
enable_mem_tracing='no'
|
|
int_type_size=0
|
|
blas_int_type_size=32
|
|
enable_blas='yes'
|
|
enable_cblas='no'
|
|
enable_mixed_dt='yes'
|
|
enable_mixed_dt_extra_mem='yes'
|
|
enable_sup_handling='yes'
|
|
enable_memkind='' # The default memkind value is determined later on.
|
|
enable_trsm_preinversion='yes'
|
|
enable_aocl_dynamic='yes'
|
|
force_version='no'
|
|
complex_return='default'
|
|
disable_blis_arch_type='no'
|
|
rename_blis_arch_type='BLIS_ARCH_TYPE'
|
|
|
|
# The addon flag and names.
|
|
addon_flag=''
|
|
addon_list=''
|
|
|
|
# The sandbox flag and name.
|
|
sandbox_flag=''
|
|
sandbox=''
|
|
|
|
# -- Configuration registry --
|
|
|
|
# The name of the chosen configuration (the configuration "family").
|
|
config_name=''
|
|
|
|
# The list of sub-configurations associated with config_name.
|
|
config_list=''
|
|
|
|
# The list of kernel sets that will be needed by the sub-configurations
|
|
# in config_list..
|
|
kernel_list=''
|
|
|
|
# The list of kernel:sub-configuration pairs for all kernels contained
|
|
# in kernel_list.
|
|
kconfig_map=''
|
|
|
|
# -- Out-of-tree --
|
|
|
|
# Whether we are building out-of-tree.
|
|
configured_oot="no"
|
|
|
|
# Dummy file. Used to check whether the cwd is the same as the top-level
|
|
# source distribution directory.
|
|
dummy_file='_blis_dir_detect.tmp'
|
|
|
|
# -- Debugging --
|
|
|
|
# A global flag to help debug the compilation command for the executable
|
|
# that configure builds on-the-fly to perform hardware auto-detection.
|
|
debug_auto_detect="no"
|
|
|
|
|
|
|
|
# -- Command line option/argument parsing ----------------------------------
|
|
|
|
found=true
|
|
while $found = true; do
|
|
|
|
# Process our command line options.
|
|
unset OPTIND
|
|
while getopts ":hp:d:e:a:s:t:r:qci:b:-:" opt; do
|
|
case $opt in
|
|
-)
|
|
case "$OPTARG" in
|
|
help)
|
|
print_usage
|
|
;;
|
|
quiet)
|
|
quiet_flag=1
|
|
;;
|
|
prefix=*)
|
|
prefix_flag=1
|
|
prefix=${OPTARG#*=}
|
|
;;
|
|
exec-prefix=*)
|
|
exec_prefix_flag=1
|
|
exec_prefix=${OPTARG#*=}
|
|
;;
|
|
libdir=*)
|
|
libdir_flag=1
|
|
libdir=${OPTARG#*=}
|
|
;;
|
|
includedir=*)
|
|
includedir_flag=1
|
|
includedir=${OPTARG#*=}
|
|
;;
|
|
sharedir=*)
|
|
sharedir_flag=1
|
|
sharedir=${OPTARG#*=}
|
|
;;
|
|
enable-debug)
|
|
debug_flag=1
|
|
debug_type=noopt
|
|
;;
|
|
enable-debug=*)
|
|
debug_flag=1
|
|
debug_type=${OPTARG#*=}
|
|
;;
|
|
disable-debug)
|
|
debug_flag=0
|
|
;;
|
|
enable-verbose-make)
|
|
enable_verbose='yes'
|
|
;;
|
|
disable-verbose-make)
|
|
enable_verbose='no'
|
|
;;
|
|
enable-arg-max-hack)
|
|
enable_arg_max_hack='yes'
|
|
;;
|
|
disable-arg-max-hack)
|
|
enable_arg_max_hack='no'
|
|
;;
|
|
enable-static)
|
|
enable_static='yes'
|
|
;;
|
|
disable-static)
|
|
enable_static='no'
|
|
;;
|
|
enable-shared)
|
|
enable_shared='yes'
|
|
;;
|
|
disable-shared)
|
|
enable_shared='no'
|
|
;;
|
|
export-shared=*)
|
|
export_shared=${OPTARG#*=}
|
|
;;
|
|
enable-system)
|
|
enable_system='yes'
|
|
;;
|
|
disable-system)
|
|
enable_system='no'
|
|
;;
|
|
enable-threading=*)
|
|
threading_model=${OPTARG#*=}
|
|
;;
|
|
disable-threading)
|
|
threading_model='off'
|
|
;;
|
|
thread-part-jrir=*)
|
|
thread_part_jrir=${OPTARG#*=}
|
|
;;
|
|
enable-pba-pools)
|
|
enable_pba_pools='yes'
|
|
;;
|
|
disable-pba-pools)
|
|
enable_pba_pools='no'
|
|
;;
|
|
enable-sba-pools)
|
|
enable_sba_pools='yes'
|
|
;;
|
|
disable-sba-pools)
|
|
enable_sba_pools='no'
|
|
;;
|
|
enable-mem-tracing)
|
|
enable_mem_tracing='yes'
|
|
;;
|
|
disable-mem-tracing)
|
|
enable_mem_tracing='no'
|
|
;;
|
|
enable-addon=*)
|
|
addon_flag=1
|
|
addon_name=${OPTARG#*=}
|
|
# Append the addon name to the list.
|
|
addon_list="${addon_list} ${addon_name}"
|
|
;;
|
|
disable-addon)
|
|
addon_flag=''
|
|
;;
|
|
enable-sandbox=*)
|
|
sandbox_flag=1
|
|
sandbox=${OPTARG#*=}
|
|
;;
|
|
disable-sandbox)
|
|
sandbox_flag=''
|
|
;;
|
|
int-size=*)
|
|
int_type_size=${OPTARG#*=}
|
|
;;
|
|
blas-int-size=*)
|
|
blas_int_type_size=${OPTARG#*=}
|
|
;;
|
|
enable-blas)
|
|
enable_blas='yes'
|
|
;;
|
|
disable-blas)
|
|
enable_blas='no'
|
|
;;
|
|
enable-cblas)
|
|
enable_cblas='yes'
|
|
;;
|
|
disable-cblas)
|
|
enable_cblas='no'
|
|
;;
|
|
enable-mixed-dt)
|
|
enable_mixed_dt='yes'
|
|
;;
|
|
disable-mixed-dt)
|
|
enable_mixed_dt='no'
|
|
;;
|
|
enable-mixed-dt-extra-mem)
|
|
enable_mixed_dt_extra_mem='yes'
|
|
;;
|
|
disable-mixed-dt-extra-mem)
|
|
enable_mixed_dt_extra_mem='no'
|
|
;;
|
|
enable-sup-handling)
|
|
enable_sup_handling='yes'
|
|
;;
|
|
disable-sup-handling)
|
|
enable_sup_handling='no'
|
|
;;
|
|
with-memkind)
|
|
enable_memkind='yes'
|
|
;;
|
|
without-memkind)
|
|
enable_memkind='no'
|
|
;;
|
|
enable-trsm-preinversion)
|
|
enable_trsm_preinversion='yes'
|
|
;;
|
|
disable-trsm-preinversion)
|
|
enable_trsm_preinversion='no'
|
|
;;
|
|
enable-aocl-dynamic)
|
|
enable_aocl_dynamic='yes'
|
|
;;
|
|
disable-aocl-dynamic)
|
|
enable_aocl_dynamic='no'
|
|
;;
|
|
force-version=*)
|
|
force_version=${OPTARG#*=}
|
|
;;
|
|
show-config-list)
|
|
show_config_list=1
|
|
;;
|
|
complex-return=*)
|
|
complex_return=${OPTARG#*=}
|
|
;;
|
|
enable-blis-arch-type)
|
|
disable_blis_arch_type='no'
|
|
;;
|
|
disable-blis-arch-type)
|
|
disable_blis_arch_type='yes'
|
|
;;
|
|
rename-blis-arch-type=*)
|
|
rename_blis_arch_type=${OPTARG#*=}
|
|
;;
|
|
*)
|
|
print_usage
|
|
;;
|
|
esac;;
|
|
h)
|
|
print_usage
|
|
;;
|
|
p)
|
|
prefix_flag=1
|
|
prefix=$OPTARG
|
|
;;
|
|
d)
|
|
debug_flag=1
|
|
debug_type=$OPTARG
|
|
;;
|
|
e)
|
|
export_shared=$OPTARG
|
|
;;
|
|
a)
|
|
addon_flag=1
|
|
addon_name=$OPTARG
|
|
# Append the addon name to the list.
|
|
addon_list="${addon_list} ${addon_name}"
|
|
;;
|
|
s)
|
|
sandbox_flag=1
|
|
sandbox=$OPTARG
|
|
;;
|
|
q)
|
|
quiet_flag=1
|
|
;;
|
|
t)
|
|
threading_model=$OPTARG
|
|
;;
|
|
r)
|
|
thread_part_jrir=$OPTARG
|
|
;;
|
|
i)
|
|
int_type_size=$OPTARG
|
|
;;
|
|
b)
|
|
blas_int_type_size=$OPTARG
|
|
;;
|
|
c)
|
|
show_config_list=1
|
|
;;
|
|
\?)
|
|
print_usage
|
|
;;
|
|
esac
|
|
done
|
|
shift $(($OPTIND - 1))
|
|
|
|
# Parse environment variables
|
|
found=false
|
|
while [ $# -gt 0 ]; do
|
|
case $1 in
|
|
*=*)
|
|
var=`expr "$1" : '\([^=]*\)='`
|
|
value=`expr "$1" : '[^=]*=\(.*\)'`
|
|
eval $var=\$value
|
|
export $var
|
|
shift
|
|
found=true
|
|
;;
|
|
*)
|
|
break
|
|
;;
|
|
esac
|
|
done
|
|
done
|
|
|
|
|
|
# -- Check the operating system --------------------------------------------
|
|
|
|
os_name=$(uname -s)
|
|
os_vers=$(uname -r)
|
|
echo "${script_name}: detected ${os_name} kernel version ${os_vers}."
|
|
|
|
# Define a single variable off of which we can branch to tell if we are
|
|
# building for Windows.
|
|
is_win=no
|
|
if [[ $os_name == MSYS* ]] || \
|
|
[[ $os_name == MINGW* ]] || \
|
|
[[ $os_name == CYGWIN* ]] ; then
|
|
is_win=yes
|
|
fi
|
|
|
|
|
|
# -- Find a python interpreter ---------------------------------------------
|
|
|
|
# Acquire the python search order. This may vary based on the os found
|
|
# above.
|
|
python_search_list=$(get_python_search_list)
|
|
|
|
echo "${script_name}: python interpeter search list is: ${python_search_list}."
|
|
|
|
# Find a working python interpreter.
|
|
found_python=$(select_tool "${python_search_list}" "${PYTHON}")
|
|
|
|
# If we didn't find any working python interpreters, we print an error
|
|
# message.
|
|
if [ -z "${found_python}" ]; then
|
|
echo "${script_name}: *** Could not find working python interperter! Cannot continue."
|
|
exit 1
|
|
fi
|
|
|
|
echo "${script_name}: using '${found_python}' python interpreter."
|
|
|
|
|
|
# -- Check the python version ----------------------------------------------
|
|
|
|
# Check the python interpreter's version.
|
|
get_python_version
|
|
check_python
|
|
|
|
|
|
# -- Find a C compiler -----------------------------------------------------
|
|
|
|
# Acquire the compiler search order. This will vary based on the os found
|
|
# above.
|
|
cc_search_list=$(get_cc_search_list)
|
|
|
|
echo "${script_name}: C compiler search list is: ${cc_search_list}."
|
|
|
|
# Find a working C compiler.
|
|
found_cc=$(select_tool "${cc_search_list}" "${CC}")
|
|
|
|
# If we didn't find any working C compilers, we print an error message.
|
|
if [ -z "${found_cc}" ]; then
|
|
echo "${script_name}: *** Could not find working C compiler! Cannot continue."
|
|
exit 1
|
|
fi
|
|
|
|
echo "${script_name}: using '${found_cc}' C compiler."
|
|
|
|
|
|
# -- Find a C++ compiler ---------------------------------------------------
|
|
|
|
# Acquire the compiler search order. This will vary based on the os
|
|
# found above.
|
|
cxx_search_list=$(get_cxx_search_list)
|
|
|
|
echo "${script_name}: C++ compiler search list is: ${cxx_search_list}."
|
|
|
|
# Find a working C++ compiler. NOTE: We can reuse the select_tool()
|
|
# function since it is written in a way that is general-purpose.
|
|
found_cxx=$(select_tool "${cxx_search_list}" "${CXX}")
|
|
|
|
# If we didn't find any working C++ compilers, we print an error message.
|
|
if [ -z "${found_cxx}" ]; then
|
|
echo "${script_name}: Could not find working C++ compiler! C++ will not be available in sandbox."
|
|
found_cxx="c++notfound"
|
|
fi
|
|
|
|
echo "${script_name}: using '${found_cxx}' C++ compiler (for sandbox only)."
|
|
|
|
|
|
# -- Check the compiler version --------------------------------------------
|
|
|
|
# Initialize the blacklist to empty.
|
|
blacklist_init
|
|
|
|
# Check the compiler's version. Certain versions of certain compilers
|
|
# will preclude building certain sub-configurations, which are added
|
|
# to a blacklist. We also make note of certain version ranges that
|
|
# will be useful to know about later.
|
|
get_compiler_version
|
|
check_compiler
|
|
check_compiler_version_ranges
|
|
|
|
# Now check the assembler's ability to assemble code. Older versions
|
|
# of binutils may not be aware of certain instruction sets. Those
|
|
# sub-configurations employing kernels that use such instruction sets
|
|
# will also be blacklisted.
|
|
get_binutils_version
|
|
check_assembler
|
|
|
|
# Remove duplicates and whitespace from the blacklist.
|
|
blacklist_cleanup
|
|
|
|
if [ -n "${config_blist}" ]; then
|
|
|
|
echo "${script_name}: configuration blacklist:"
|
|
echo "${script_name}: ${config_blist}"
|
|
fi
|
|
|
|
|
|
# -- Read the configuration registry ---------------------------------------
|
|
|
|
# Make sure the config registry file exists and can be opened.
|
|
if [ ! -f "${registry_filepath}" ]; then
|
|
|
|
echo "${script_name}: could not open '${registry_file}' file; cannot continue."
|
|
echo "${script_name}: BLIS distribution appears to be incomplete."
|
|
echo "${script_name}: *** Please verify source distribution."
|
|
|
|
exit 1
|
|
fi
|
|
|
|
# Read the registered configuration names and lists into associative
|
|
# arrays.
|
|
echo -n "${script_name}: reading configuration registry..."
|
|
read_registry_file ${registry_filepath}
|
|
echo "done."
|
|
|
|
# Report if additional configurations needed to be blacklisted.
|
|
# NOTE: This branch should never execute so long as indirect blacklisting
|
|
# is disabled. See comment regarding issue #214 in the definition of
|
|
# pass_config_kernel_registries().
|
|
if [ -n "${indirect_blist}" ]; then
|
|
echo "${script_name}: needed to indirectly blacklist additional configurations:"
|
|
echo "${script_name}: ${indirect_blist}"
|
|
fi
|
|
|
|
|
|
# -- Acquire the BLIS version ----------------------------------------------
|
|
|
|
# Set the 'version' variable to the default value (the 'git describe'
|
|
# augmented instance of whatever is in the 'version' file if this is a git
|
|
# clone, or whatever is in the 'version' file unmodified if it is a bare
|
|
# source release).
|
|
set_default_version "${version_filepath}"
|
|
|
|
# Initial message.
|
|
echo "${script_name}: starting configuration of BLIS ${version}."
|
|
|
|
# Check if the user requested a custom version string.
|
|
if [ "x${force_version}" = "xno" ]; then
|
|
echo "${script_name}: configuring with official version string."
|
|
else
|
|
echo "${script_name}: configuring with custom version string '${force_version}'."
|
|
version="${force_version}"
|
|
fi
|
|
|
|
|
|
# -- Acquire the shared library (.so) versions -----------------------------
|
|
|
|
# The first line of the 'so_version' file contains the .so major version.
|
|
so_version_major=$(cat ${so_version_filepath} | sed -n "1p")
|
|
|
|
# The second line contains the minor and build .so version numbers
|
|
# (separated by a '.').
|
|
so_version_minorbuild=$(cat ${so_version_filepath} | sed -n "2p")
|
|
|
|
echo "${script_name}: found shared library .so version '${so_version_major}.${so_version_minorbuild}'."
|
|
echo "${script_name}: .so major version: ${so_version_major}"
|
|
echo "${script_name}: .so minor.build version: ${so_version_minorbuild}"
|
|
|
|
|
|
# -- Various pre-configuration checks --------------------------------------
|
|
|
|
# Set config_name based on the number of arguments leftover (after command
|
|
# line option processing).
|
|
if [ $# = "0" ]; then
|
|
|
|
#configs_avail="auto "$(ls ${config_dirpath})
|
|
|
|
echo "${script_name}: "
|
|
echo "${script_name}: *** No configuration given! ***"
|
|
echo "${script_name}: "
|
|
echo "${script_name}: Default configuration behavior is not implemented (for your"
|
|
echo "${script_name}: own safety). Please re-run '${script_name}' and specify one"
|
|
echo "${script_name}: of the existing configurations in the source distribution's"
|
|
echo "${script_name} '${registry_file}' file:"
|
|
echo "${script_name}: "
|
|
#for k in "${!config_registry[@]}"; do
|
|
for cr_var in ${!config_registry_*}; do
|
|
|
|
#v=${config_registry[$k]}
|
|
k=${cr_var##config_registry_}; v=${!cr_var}
|
|
|
|
echo "${script_name}: $k (${v})"
|
|
done
|
|
echo "${script_name}: "
|
|
|
|
exit 1
|
|
|
|
elif [ $# != "1" ]; then # more than one configuration argument given.
|
|
|
|
print_usage
|
|
|
|
fi
|
|
|
|
if [ $1 = "auto" ]; then
|
|
|
|
echo "${script_name}: automatic configuration requested."
|
|
|
|
# Call the auto_detect() function and save the returned string in
|
|
# config_name.
|
|
config_name=$(auto_detect)
|
|
#config_name="generic"
|
|
|
|
# Debugging stuff. When confirming the behavior of auto_detect(),
|
|
# it is useful to output ${config_name}, which in theory could be
|
|
# set temoprarily to something other than the config_name, such as
|
|
# the compilation command.
|
|
if [ "${debug_auto_detect}" = "yes" ]; then
|
|
echo "auto-detect program compilation command: ${config_name}"
|
|
exit 1
|
|
fi
|
|
|
|
echo "${script_name}: hardware detection driver returned '${config_name}'."
|
|
|
|
# If the auto-detect code returned the "generic" string, it means we
|
|
# were unable to automatically detect the user's hardware type. While
|
|
# this is going to be a rare event, it will likely lead the user to
|
|
# experience much lower performance than expected, and thus we will
|
|
# warn them about it at the end of the configure output (to increase
|
|
# the chances that they see it).
|
|
if [ "${config_name}" = "generic" ]; then
|
|
|
|
warn_user_generic=1
|
|
else
|
|
warn_user_generic=0
|
|
fi
|
|
else
|
|
|
|
# Use the command line argument as the configuration name.
|
|
config_name=$1
|
|
|
|
echo "${script_name}: manual configuration requested; configuring with '${config_name}'."
|
|
|
|
fi
|
|
|
|
# Use the selected config name to look up the list of configurations
|
|
# and kernels associated with that name.
|
|
#config_list=${config_registry[${config_name}]}
|
|
#kernel_list=${kernel_registry[${config_name}]}
|
|
config_list=$(query_array "config_registry" ${config_name})
|
|
kernel_list=$(query_array "kernel_registry" ${config_name})
|
|
|
|
# Use the config_registry and kernel_registry to build a kconfig_registry
|
|
# for the selected config_name.
|
|
build_kconfig_registry "${config_name}"
|
|
|
|
# Print the configuration list and kernel list, if requested.
|
|
if [ "${show_config_list}" == "1" ]; then
|
|
|
|
echo "${script_name}: configuration list:"
|
|
#for k in "${!config_registry[@]}"; do
|
|
for cr_var in ${!config_registry_*}; do
|
|
|
|
#v=${config_registry[$k]}
|
|
k=${cr_var##config_registry_}; v=${!cr_var}
|
|
|
|
echo "${script_name}: $k: ${v}"
|
|
done
|
|
|
|
echo "${script_name}: kernel list:"
|
|
#for k in "${!kernel_registry[@]}"; do
|
|
for kr_var in ${!kernel_registry_*}; do
|
|
|
|
#v=${kernel_registry[$k]}
|
|
k=${kr_var##kernel_registry_}; v=${!kr_var}
|
|
|
|
echo "${script_name}: $k: ${v}"
|
|
done
|
|
|
|
echo "${script_name}: kernel-to-config map for '${config_name}':"
|
|
#for k in "${!kconfig_registry[@]}"; do
|
|
for kc_var in ${!kconfig_registry_*}; do
|
|
|
|
#v=${kconfig_registry[$k]}
|
|
k=${kc_var##kconfig_registry_}; v=${!kc_var}
|
|
|
|
echo "${script_name}: $k: ${v}"
|
|
done
|
|
fi
|
|
|
|
# For each kernel in the kernel list, reduce the list of associated
|
|
# sub-configurations (in the kconfig_registry) to a singleton using
|
|
# the following rules:
|
|
# 1. If the list is a singleton, use that name.
|
|
# 2. If the list contains a sub-configuration name that matches the
|
|
# kernel name, use that name.
|
|
# 3. Otherwise, use the first name in the list.
|
|
# We use the chosen singleton to ceate a "kernel:subconfig" pair, which
|
|
# we accumulate into a list. This list is the kernel-to-config map, or
|
|
# kconfig_map.
|
|
|
|
# We use a sorted version of kernel_list so that it ends up matching the
|
|
# display order of the kconfig_registry above.
|
|
kernel_list_sort=$(echo ${kernel_list} | xargs -n1 | sort -u)
|
|
|
|
kconfig_map=""
|
|
for kernel in ${kernel_list_sort}; do
|
|
|
|
#configs="${kconfig_registry[$kernel]}"
|
|
configs=$(query_array "kconfig_registry" ${kernel})
|
|
|
|
has_one_kernel=$(is_singleton "${configs}")
|
|
contains_kernel=$(is_in_list "${kernel}" "${configs}")
|
|
|
|
# Check if the list is a singleton.
|
|
if [ "${has_one_kernel}" == "true" ]; then
|
|
|
|
reducedclist="${configs}"
|
|
|
|
# Check if the list contains a sub-config name that matches the kernel.
|
|
elif [ "${contains_kernel}" == "true" ]; then
|
|
|
|
reducedclist="${kernel}"
|
|
|
|
# Otherwise, use the last name.
|
|
else
|
|
|
|
last_config=${configs##* }
|
|
reducedclist="${last_config}"
|
|
fi
|
|
|
|
# Create a new "kernel:subconfig" pair and add it to the kconfig_map
|
|
# list, removing whitespace.
|
|
new_pair="${kernel}:${reducedclist}"
|
|
kconfig_map=$(canonicalize_ws "${kconfig_map} ${new_pair}")
|
|
done
|
|
|
|
if [ "${show_config_list}" == "1" ]; then
|
|
|
|
echo "${script_name}: kernel-to-config map for '${config_name}' (chosen pairs):"
|
|
for k in ${kconfig_map}; do
|
|
echo "${script_name}: $k"
|
|
done
|
|
fi
|
|
|
|
|
|
echo "${script_name}: checking configuration against contents of '${registry_file}'."
|
|
|
|
# First, ensure that the config name is registered (ie: it is present
|
|
# in the config_registry file).
|
|
if [ -z "${config_list}" ]; then
|
|
|
|
# NOTE: This branch should never execute when using auto-detection,
|
|
# but we have it here just in case.
|
|
if [ $1 = "auto" ]; then
|
|
|
|
echo "${script_name}: 'auto-detected configuration '${config_name}' is NOT registered!"
|
|
echo "${script_name}: "
|
|
echo "${script_name}: *** Cannot continue with unregistered configuration '${config_name}'. ***"
|
|
echo "${script_name}: "
|
|
exit 1;
|
|
|
|
else
|
|
|
|
# At this point, we know: (a) config_list is empty; and (b) the user
|
|
# requested manual configuration. If the config_name given by the
|
|
# user is present in the configuration blacklist (config_blist),
|
|
# then we can deduce why the config_list is empty: because the only
|
|
# subconfig implied by config_name is blacklisted. Thus, we cannot
|
|
# proceed.
|
|
|
|
if [ $(is_in_list "${config_name}" "${config_blist}") == "true" ]; then
|
|
|
|
echo "${script_name}: 'user-specified configuration '${config_name}' is blacklisted!"
|
|
echo "${script_name}: "
|
|
echo "${script_name}: *** Cannot continue with blacklisted configuration '${config_name}'. ***"
|
|
echo "${script_name}: *** Try updating your compiler and/or assembler (binutils) versions. ***"
|
|
echo "${script_name}: "
|
|
exit 1;
|
|
else
|
|
|
|
# If config_name is NOT present in config_blist, then we know
|
|
# that config_list is empty simply because config_name is
|
|
# unregistered.
|
|
|
|
echo "${script_name}: 'user-specified configuration '${config_name}' is NOT registered!"
|
|
echo "${script_name}: "
|
|
echo "${script_name}: *** Cannot continue with unregistered configuration '${config_name}'. ***"
|
|
echo "${script_name}: "
|
|
exit 1;
|
|
fi
|
|
fi
|
|
else
|
|
|
|
# This branch executes when the configuration is found to be present
|
|
# (i.e. registered) in the config_registry file.
|
|
|
|
echo "${script_name}: configuration '${config_name}' is registered."
|
|
echo "${script_name}: '${config_name}' is defined as having the following sub-configurations:"
|
|
echo "${script_name}: ${config_list}"
|
|
echo "${script_name}: which collectively require the following kernels:"
|
|
echo "${script_name}: ${kernel_list}"
|
|
|
|
fi
|
|
|
|
|
|
echo "${script_name}: checking sub-configurations:"
|
|
|
|
# Now, verify that the constituent configurations associated with the
|
|
# config name are all valid.
|
|
for conf in ${config_list}; do
|
|
|
|
# First confirm that the current configuration is registered.
|
|
#this_clist=${config_registry[${conf}]}
|
|
this_clist=$(query_array "config_registry" ${conf})
|
|
|
|
# If the config_list associated with conf is empty, then it was
|
|
# never entered into the config_registry to begin with. Thus,
|
|
# conf must be unregistered.
|
|
if [ -z "${this_clist}" ]; then
|
|
echo "${script_name}: '${conf}' is NOT registered!"
|
|
echo "${script_name}: "
|
|
echo "${script_name}: *** Cannot continue with unregistered configuration '${conf}'. ***"
|
|
echo "${script_name}: "
|
|
exit 1;
|
|
else
|
|
echo -n "${script_name}: '${conf}' is registered."
|
|
fi
|
|
|
|
# Then confirm that the current sub-configuration directory exists.
|
|
if [ ! -d "${config_dirpath}/${conf}" ]; then
|
|
echo "..but does NOT exist!"
|
|
echo "${script_name}: "
|
|
echo "${script_name}: *** Cannot continue with nonexistent configuration '${conf}'. ***"
|
|
echo "${script_name}: "
|
|
exit 1;
|
|
else
|
|
echo "..and exists."
|
|
fi
|
|
done
|
|
|
|
|
|
echo "${script_name}: checking sub-configurations' requisite kernels:"
|
|
|
|
# Also, let's verify that the requisite kernel sets associated with
|
|
# the config name all correspond to directories that exist.
|
|
for kernel in ${kernel_list}; do
|
|
|
|
echo -n "${script_name}: '${kernel}' kernels..."
|
|
|
|
# Confirm that the current kernel sub-directory exists.
|
|
if [ ! -d "${kernels_dirpath}/${kernel}" ]; then
|
|
echo "do NOT exist!"
|
|
echo "${script_name}: "
|
|
echo "${script_name}: *** Cannot continue with nonexistent kernel '${kernel}'. ***"
|
|
echo "${script_name}: "
|
|
exit 1;
|
|
else
|
|
echo "exist."
|
|
fi
|
|
done
|
|
|
|
# In order to determine the default behavior of the --with[out]-memkind
|
|
# option, we try to detect whether libmemkind is available. If it is,
|
|
# the default implied option will be --with-memkind; otherwise, will be
|
|
# --without-memkind.
|
|
has_memkind=$(has_libmemkind)
|
|
|
|
# Try to determine whether the chosen compiler supports #pragma omp simd.
|
|
pragma_omp_simd=$(has_pragma_omp_simd)
|
|
|
|
|
|
# -- Prepare variables for subsitution into template files -----------------
|
|
|
|
# Parse the status of the prefix option and echo feedback.
|
|
if [ -n "${prefix_flag}" ]; then
|
|
echo "${script_name}: detected --prefix='${prefix}'."
|
|
else
|
|
echo "${script_name}: no install prefix option given; defaulting to '${prefix}'."
|
|
fi
|
|
|
|
# Parse the status of the exec_prefix option and echo feedback.
|
|
if [ -n "${exec_prefix_flag}" ]; then
|
|
echo "${script_name}: detected --exec-prefix='${exec_prefix}'."
|
|
else
|
|
echo "${script_name}: no install exec_prefix option given; defaulting to PREFIX."
|
|
fi
|
|
|
|
# Parse the status of the libdir option and echo feedback.
|
|
if [ -n "${libdir_flag}" ]; then
|
|
echo "${script_name}: detected --libdir='${libdir}'."
|
|
else
|
|
echo "${script_name}: no install libdir option given; defaulting to EXECPREFIX/lib."
|
|
fi
|
|
|
|
# Parse the status of the includedir option and echo feedback.
|
|
if [ -n "${includedir_flag}" ]; then
|
|
echo "${script_name}: detected --includedir='${includedir}'."
|
|
else
|
|
echo "${script_name}: no install includedir option given; defaulting to PREFIX/include."
|
|
fi
|
|
|
|
# Parse the status of the sharedir option and echo feedback.
|
|
if [ -n "${sharedir_flag}" ]; then
|
|
echo "${script_name}: detected --sharedir='${sharedir}'."
|
|
else
|
|
echo "${script_name}: no install sharedir option given; defaulting to PREFIX/share."
|
|
fi
|
|
|
|
# Echo the installation directories that we settled on.
|
|
echo "${script_name}: final installation directories:"
|
|
echo "${script_name}: prefix: "${prefix}
|
|
echo "${script_name}: exec_prefix: "${exec_prefix}
|
|
echo "${script_name}: libdir: "${libdir}
|
|
echo "${script_name}: includedir: "${includedir}
|
|
echo "${script_name}: sharedir: "${sharedir}
|
|
echo "${script_name}: NOTE: the variables above can be overridden when running make."
|
|
|
|
# Check if CFLAGS is non-empty.
|
|
if [ -n "${CFLAGS}" ]; then
|
|
cflags_preset="${CFLAGS}"
|
|
echo "${script_name}: detected preset CFLAGS; prepending:"
|
|
echo "${script_name}: ${cflags_preset}"
|
|
else
|
|
cflags_preset=''
|
|
echo "${script_name}: no preset CFLAGS detected."
|
|
fi
|
|
|
|
# Check if LDFLAGS is non-empty.
|
|
if [ -n "${LDFLAGS}" ]; then
|
|
ldflags_preset="${LDFLAGS}"
|
|
echo "${script_name}: detected preset LDFLAGS; prepending:"
|
|
echo "${script_name}: ${ldflags_preset}"
|
|
else
|
|
ldflags_preset=''
|
|
echo "${script_name}: no preset LDFLAGS detected."
|
|
fi
|
|
|
|
# Check if the debug flag was specified.
|
|
if [ -n "${debug_flag}" ]; then
|
|
if [ "x${debug_type}" = "xopt" ]; then
|
|
echo "${script_name}: enabling debug symbols with optimizations."
|
|
elif [ "x${debug_type}" = "xsde" ]; then
|
|
debug_type='sde'
|
|
echo "${script_name}: enabling SDE processor emulation."
|
|
else
|
|
debug_type='noopt'
|
|
echo "${script_name}: enabling debug symbols; optimizations disabled."
|
|
fi
|
|
else
|
|
debug_type='off'
|
|
echo "${script_name}: debug symbols disabled."
|
|
fi
|
|
|
|
# Check if the verbose make flag was specified.
|
|
if [ "x${enable_verbose}" = "xyes" ]; then
|
|
echo "${script_name}: enabling verbose make output. (disable with 'make V=0'.)"
|
|
else
|
|
echo "${script_name}: disabling verbose make output. (enable with 'make V=1'.)"
|
|
fi
|
|
|
|
# Check if the ARG_MAX hack was requested.
|
|
if [ "x${enable_arg_max_hack}" = "xyes" ]; then
|
|
echo "${script_name}: enabling ARG_MAX hack."
|
|
else
|
|
echo "${script_name}: disabling ARG_MAX hack."
|
|
fi
|
|
|
|
enable_shared_01=1
|
|
# Check if the static lib flag was specified.
|
|
if [ "x${enable_static}" = "xyes" -a "x${enable_shared}" = "xyes" ]; then
|
|
echo "${script_name}: building BLIS as both static and shared libraries."
|
|
elif [ "x${enable_static}" = "xyes" -a "x${enable_shared}" = "xno" ]; then
|
|
echo "${script_name}: building BLIS as a static library (shared library disabled)."
|
|
enable_shared_01=0
|
|
elif [ "x${enable_static}" = "xno" -a "x${enable_shared}" = "xyes" ]; then
|
|
echo "${script_name}: building BLIS as a shared library (static library disabled)."
|
|
else
|
|
echo "${script_name}: Both static and shared libraries were disabled."
|
|
echo "${script_name}: *** Please enable one (or both) to continue."
|
|
exit 1
|
|
fi
|
|
|
|
# Check if the "export shared" flag was specified.
|
|
if [ "x${export_shared}" = "xall" ]; then
|
|
if [ "x${enable_shared}" = "xyes" ]; then
|
|
echo "${script_name}: exporting all symbols within shared library."
|
|
else
|
|
echo "${script_name}: ignoring request to export all symbols within shared library."
|
|
fi
|
|
elif [ "x${export_shared}" = "xpublic" ]; then
|
|
if [ "x${enable_shared}" = "xyes" ]; then
|
|
echo "${script_name}: exporting only public symbols within shared library."
|
|
fi
|
|
else
|
|
echo "${script_name}: *** Invalid argument '${export_shared}' to --export-shared option given."
|
|
echo "${script_name}: *** Please use 'public' or 'all'."
|
|
exit 1
|
|
fi
|
|
|
|
# Check if we are building with or without operating system support.
|
|
if [ "x${enable_system}" = "xyes" ]; then
|
|
echo "${script_name}: enabling operating system support."
|
|
enable_system_01=1
|
|
else
|
|
echo "${script_name}: disabling operating system support."
|
|
echo "${script_name}: WARNING: all threading will be disabled!"
|
|
enable_system_01=0
|
|
|
|
# Force threading to be disabled.
|
|
threading_model='off'
|
|
fi
|
|
|
|
# Check the threading model flag and standardize its value, if needed.
|
|
# NOTE: 'omp' is deprecated but still supported; 'openmp' is preferred.
|
|
enable_openmp='no'
|
|
enable_openmp_01=0
|
|
enable_pthreads='no'
|
|
enable_pthreads_01=0
|
|
if [ "x${threading_model}" = "xauto" ]; then
|
|
echo "${script_name}: determining the threading model automatically."
|
|
elif [ "x${threading_model}" = "xopenmp" ] ||
|
|
[ "x${threading_model}" = "xomp" ]; then
|
|
echo "${script_name}: using OpenMP for threading."
|
|
enable_openmp='yes'
|
|
enable_openmp_01=1
|
|
threading_model="openmp" # Standardize the value.
|
|
elif [ "x${threading_model}" = "xpthreads" ] ||
|
|
[ "x${threading_model}" = "xpthread" ] ||
|
|
[ "x${threading_model}" = "xposix" ]; then
|
|
echo "${script_name}: using POSIX threads for threading."
|
|
enable_pthreads='yes'
|
|
enable_pthreads_01=1
|
|
threading_model="pthreads" # Standardize the value.
|
|
elif [ "x${threading_model}" = "xoff" ] ||
|
|
[ "x${threading_model}" = "xno" ] ||
|
|
[ "x${threading_model}" = "xnone" ]; then
|
|
echo "${script_name}: threading is disabled."
|
|
threading_model="off"
|
|
else
|
|
echo "${script_name}: *** Unsupported threading model: ${threading_model}."
|
|
exit 1
|
|
fi
|
|
|
|
# Check the method of assigning micropanels to threads in the JR and IR
|
|
# loops.
|
|
enable_jrir_slab_01=0
|
|
enable_jrir_rr_01=0
|
|
if [ "x${thread_part_jrir}" = "xslab" ]; then
|
|
echo "${script_name}: requesting slab threading in jr and ir loops."
|
|
enable_jrir_slab_01=1
|
|
elif [ "x${thread_part_jrir}" = "xrr" ]; then
|
|
echo "${script_name}: requesting round-robin threading in jr and ir loops."
|
|
enable_jrir_rr_01=1
|
|
else
|
|
echo "${script_name}: *** Unsupported method of thread partitioning in jr and ir loops: ${threading_model}."
|
|
exit 1
|
|
fi
|
|
|
|
# Convert 'yes' and 'no' flags to booleans.
|
|
if [ "x${enable_pba_pools}" = "xyes" ]; then
|
|
echo "${script_name}: internal memory pools for packing blocks are enabled."
|
|
enable_pba_pools_01=1
|
|
else
|
|
echo "${script_name}: internal memory pools for packing blocks are disabled."
|
|
enable_pba_pools_01=0
|
|
fi
|
|
if [ "x${enable_sba_pools}" = "xyes" ]; then
|
|
echo "${script_name}: internal memory pools for small blocks are enabled."
|
|
enable_sba_pools_01=1
|
|
else
|
|
echo "${script_name}: internal memory pools for small blocks are disabled."
|
|
enable_sba_pools_01=0
|
|
fi
|
|
if [ "x${enable_mem_tracing}" = "xyes" ]; then
|
|
echo "${script_name}: memory tracing output is enabled."
|
|
enable_mem_tracing_01=1
|
|
else
|
|
echo "${script_name}: memory tracing output is disabled."
|
|
enable_mem_tracing_01=0
|
|
fi
|
|
if [ "x${has_memkind}" = "xyes" ]; then
|
|
if [ "x${enable_memkind}" = "x" ]; then
|
|
# If no explicit option was given for libmemkind one way or the other,
|
|
# we use the value returned previously by has_libmemkind(), in this
|
|
# case "yes", to determine the default.
|
|
echo "${script_name}: libmemkind found; default is to enable use."
|
|
enable_memkind="yes"
|
|
enable_memkind_01=1
|
|
else
|
|
if [ "x${enable_memkind}" = "xyes" ]; then
|
|
echo "${script_name}: received explicit request to enable libmemkind."
|
|
enable_memkind="yes"
|
|
enable_memkind_01=1
|
|
else
|
|
echo "${script_name}: received explicit request to disable libmemkind."
|
|
enable_memkind="no"
|
|
enable_memkind_01=0
|
|
fi
|
|
fi
|
|
else
|
|
echo "${script_name}: libmemkind not found; disabling."
|
|
if [ "x${enable_memkind}" = "xyes" ]; then
|
|
echo "${script_name}: cannot honor explicit request to enable libmemkind."
|
|
fi
|
|
enable_memkind="no"
|
|
enable_memkind_01=0
|
|
fi
|
|
if [ "x${pragma_omp_simd}" = "xyes" ]; then
|
|
echo "${script_name}: compiler appears to support #pragma omp simd."
|
|
enable_pragma_omp_simd_01=1
|
|
else
|
|
echo "${script_name}: compiler appears to not support #pragma omp simd."
|
|
enable_pragma_omp_simd_01=0
|
|
fi
|
|
if [ "x${enable_blas}" = "xyes" ]; then
|
|
echo "${script_name}: the BLAS compatibility layer is enabled."
|
|
enable_blas_01=1
|
|
else
|
|
echo "${script_name}: the BLAS compatibility layer is disabled."
|
|
enable_blas_01=0
|
|
fi
|
|
if [ "x${enable_cblas}" = "xyes" ]; then
|
|
echo "${script_name}: the CBLAS compatibility layer is enabled."
|
|
enable_cblas_01=1
|
|
# Force BLAS layer when CBLAS is enabled
|
|
enable_blas='yes'
|
|
else
|
|
echo "${script_name}: the CBLAS compatibility layer is disabled."
|
|
enable_cblas_01=0
|
|
fi
|
|
if [ "x${enable_mixed_dt}" = "xyes" ]; then
|
|
echo "${script_name}: mixed datatype support is enabled."
|
|
|
|
if [ "x${enable_mixed_dt_extra_mem}" = "xyes" ]; then
|
|
echo "${script_name}: mixed datatype optimizations requiring extra memory are enabled."
|
|
enable_mixed_dt_extra_mem_01=1
|
|
else
|
|
echo "${script_name}: mixed datatype optimizations requiring extra memory are disabled."
|
|
enable_mixed_dt_extra_mem_01=0
|
|
fi
|
|
|
|
enable_mixed_dt_01=1
|
|
else
|
|
echo "${script_name}: mixed datatype support is disabled."
|
|
|
|
enable_mixed_dt_extra_mem_01=0
|
|
enable_mixed_dt_01=0
|
|
fi
|
|
if [ "x${enable_sup_handling}" = "xyes" ]; then
|
|
echo "${script_name}: small matrix handling is enabled."
|
|
enable_sup_handling_01=1
|
|
else
|
|
echo "${script_name}: small matrix handling is disabled."
|
|
enable_sup_handling_01=0
|
|
fi
|
|
if [ "x${enable_trsm_preinversion}" = "xyes" ]; then
|
|
echo "${script_name}: trsm diagonal element pre-inversion is enabled."
|
|
enable_trsm_preinversion_01=1
|
|
else
|
|
echo "${script_name}: trsm diagonal element pre-inversion is disabled."
|
|
enable_trsm_preinversion_01=0
|
|
fi
|
|
|
|
# Check aocl dynamic threading configuration and enable it only if
|
|
# multi-threading is enabled
|
|
if [ "x${enable_aocl_dynamic}" = "xyes" ]; then
|
|
if [ "x${threading_model}" != "xoff" ]; then
|
|
echo "${script_name}: dynamic selection of number of threads is enabled"
|
|
enable_aocl_dynamic_01=1
|
|
else
|
|
enable_aocl_dynamic_01=0
|
|
enable_aocl_dynamic="no"
|
|
echo "${script_name}: dynamic threading is disabled as multithreading is disabled"
|
|
fi
|
|
else
|
|
echo "${script_name}: dynamic selection of number of threads is disabled"
|
|
enable_aocl_dynamic_01=0
|
|
fi
|
|
|
|
# Report integer sizes.
|
|
if [ "x${int_type_size}" = "x32" ]; then
|
|
echo "${script_name}: the BLIS API integer size is 32-bit."
|
|
elif [ "x${int_type_size}" = "x64" ]; then
|
|
echo "${script_name}: the BLIS API integer size is 64-bit."
|
|
else
|
|
echo "${script_name}: the BLIS API integer size is automatically determined."
|
|
fi
|
|
if [ "x${blas_int_type_size}" = "x32" ]; then
|
|
echo "${script_name}: the BLAS/CBLAS API integer size is 32-bit."
|
|
elif [ "x${blas_int_type_size}" = "x64" ]; then
|
|
echo "${script_name}: the BLAS/CBLAS API integer size is 64-bit."
|
|
else
|
|
echo "${script_name}: the BLAS/CBLAS API integer size is automatically determined."
|
|
fi
|
|
|
|
# Disallow the simultaneous use of 64-bit integers in the BLAS and
|
|
# 32-bit integers in BLIS.
|
|
if [ "x${blas_int_type_size}" = "x64" -a "x${int_type_size}" = "x32" ]; then
|
|
echo "${script_name}: *** To avoid the possibility of truncation, we do not allow use of 64-bit integers in the BLAS API with 32-bit integers in BLIS. Please use a different configuration of integers."
|
|
exit 1
|
|
fi
|
|
|
|
# Check if addons were given.
|
|
if [ -n "${addon_flag}" ]; then
|
|
|
|
# Remove duplicates in the addon list, if they exist.
|
|
addon_list=$(rm_duplicate_words_simple "${addon_list}")
|
|
|
|
echo "${script_name}: configuring with addons:"
|
|
|
|
for addon in ${addon_list}; do
|
|
|
|
echo "${script_name}: ${addon_dir}/${addon}"
|
|
|
|
addon_fullpath="${addon_dirpath}/${addon}"
|
|
|
|
if [ ! -d "${addon_fullpath}" ]; then
|
|
echo "${script_name}: requested addon sub-directory does not exist! Cannot continue."
|
|
echo "${script_name}: *** Please verify addon existence and name."
|
|
exit 1
|
|
fi
|
|
done
|
|
|
|
enable_addons_01=1
|
|
else
|
|
echo "${script_name}: configuring with no addons."
|
|
|
|
enable_addons_01=0
|
|
fi
|
|
|
|
# Check if a sandbox was given.
|
|
if [ -n "${sandbox_flag}" ]; then
|
|
|
|
#sandbox_relpath="${sandbox_dir}/${sandbox}"
|
|
|
|
echo "${script_name}: configuring for alternate gemm implementation:"
|
|
echo "${script_name}: ${sandbox_dir}/${sandbox}"
|
|
|
|
sandbox_fullpath="${sandbox_dirpath}/${sandbox}"
|
|
|
|
if [ ! -d "${sandbox_fullpath}" ]; then
|
|
echo "${script_name}: requested sandbox sub-directory does not exist! Cannot continue."
|
|
echo "${script_name}: *** Please verify sandbox existence and name."
|
|
exit 1
|
|
fi
|
|
|
|
enable_sandbox_01=1
|
|
else
|
|
echo "${script_name}: configuring for conventional gemm implementation."
|
|
|
|
enable_sandbox_01=0
|
|
fi
|
|
|
|
# Check the method used for returning complex numbers
|
|
if [ "x${complex_return}" = "xdefault" ]; then
|
|
if [ -n "${FC}" ]; then
|
|
# Determine the complex return type from the given Fortran compiler
|
|
|
|
# Query the full vendor version string output. This includes the
|
|
# version number along with (potentially) a bunch of other textual
|
|
# clutter.
|
|
# NOTE: This maybe should use merged stdout/stderr rather than only
|
|
# stdout. But it works for now.
|
|
vendor_string="$(${FC} --version 2>/dev/null)"
|
|
|
|
# Query the compiler "vendor" (ie: the compiler's simple name) and
|
|
# isolate the version number.
|
|
# The last part ({ read first rest ; echo $first ; }) is a workaround
|
|
# to OS X's egrep only returning the first match.
|
|
fc_vendor=$(echo "${vendor_string}" | egrep -o 'ifort|GNU' | { read first rest ; echo $first ; })
|
|
|
|
if [ "x${fc_vendor}" = "xifort" ]; then
|
|
complex_return='intel'
|
|
elif [ "x${fc_vendor}" = "xGNU" ]; then
|
|
complex_return='gnu'
|
|
else
|
|
echo "${script_name}: unable to determine Fortran compiler vendor!"
|
|
complex_return='gnu'
|
|
fi
|
|
else
|
|
complex_return='gnu'
|
|
fi
|
|
fi
|
|
|
|
if [ "x${complex_return}" = "xgnu" ]; then
|
|
complex_return_intel01='0'
|
|
elif [ "x${complex_return}" = "xintel" ]; then
|
|
complex_return_intel01='1'
|
|
else
|
|
echo "${script_name}: unknown complex return type \"${complex_return}\"! Cannot continue."
|
|
echo "${script_name}: *** Acceptable values are \"gnu\" and \"intel\"."
|
|
exit 1
|
|
fi
|
|
|
|
if [ "x${disable_blis_arch_type}" = "xyes" ]; then
|
|
echo "${script_name}: user selection of code path using BLIS_ARCH_TYPE env var is disabled."
|
|
disable_blis_arch_type_01='1'
|
|
else
|
|
disable_blis_arch_type_01='0'
|
|
fi
|
|
|
|
# Check if the user requested a custom env var name to replace BLIS_ARCH_TYPE.
|
|
if [ "x${rename_blis_arch_type}" != "xBLIS_ARCH_TYPE" ]; then
|
|
echo "${script_name}: configuring with BLIS_ARCH_TYPE env var renamed to '${rename_blis_arch_type}'."
|
|
fi
|
|
|
|
echo "${script_name}: configuring complex return type as \"${complex_return}\"."
|
|
|
|
# Variables that may contain forward slashes, such as paths, need extra
|
|
# escaping when used in sed commands. We insert those extra escape
|
|
# characters here so that the sed commands below do the right thing.
|
|
os_name_esc=$(echo "${os_name}" | sed 's/\//\\\//g')
|
|
prefix_esc=$(echo "${prefix}" | sed 's/\//\\\//g')
|
|
exec_prefix_esc=$(echo "${exec_prefix}" | sed 's/\//\\\//g')
|
|
libdir_esc=$(echo "${libdir}" | sed 's/\//\\\//g')
|
|
includedir_esc=$(echo "${includedir}" | sed 's/\//\\\//g')
|
|
sharedir_esc=$(echo "${sharedir}" | sed 's/\//\\\//g')
|
|
dist_path_esc=$(echo "${dist_path}" | sed 's/\//\\\//g')
|
|
cc_esc=$(echo "${found_cc}" | sed 's/\//\\\//g')
|
|
cxx_esc=$(echo "${found_cxx}" | sed 's/\//\\\//g')
|
|
python_esc=$(echo "${found_python}" | sed 's/\//\\\//g')
|
|
#sandbox_relpath_esc=$(echo "${sandbox_relpath}" | sed 's/\//\\\//g')
|
|
|
|
# For RANLIB, if the variable is not set, we use a default value of
|
|
# 'ranlib'.
|
|
ranlib_esc=$(echo "${RANLIB:-ranlib}" | sed 's/\//\\\//g')
|
|
# For AR, if the variable is not set, we use a default value of 'ar'.
|
|
ar_esc=$(echo "${AR:-ar}" | sed 's/\//\\\//g')
|
|
libpthread_esc=$(echo "${LIBPTHREAD--lpthread}" | sed 's/\//\\\//g')
|
|
cflags_preset_esc=$(echo "${cflags_preset}" | sed 's/\//\\\//g')
|
|
ldflags_preset_esc=$(echo "${ldflags_preset}" | sed 's/\//\\\//g')
|
|
|
|
# For Windows builds, clear the libpthread_esc variable so that
|
|
# no pthreads library is substituted into config.mk. (Windows builds
|
|
# employ an implementation of pthreads that is internal to BLIS.)
|
|
if [[ "$is_win" == "yes" && "$cc_vendor" == "clang" ]]; then
|
|
libpthread_esc=
|
|
fi
|
|
|
|
# We also clear the libpthread_esc variable for systemless builds
|
|
# (--disable-system).
|
|
if [[ "$enable_system" == "no" ]]; then
|
|
libpthread_esc=
|
|
fi
|
|
|
|
# Typically, there are no slashes in the version variable. However,
|
|
# downstream maintainers (such as those for Debian) may create custom
|
|
# tags in their local clones such as "upstream/0.4.1", which obviously
|
|
# contain slashes. This line, and subsequent use of the escaped variable
|
|
# for the version string, accommodates those use cases.
|
|
version_esc=$(echo "${version}" | sed 's/\//\\\//g')
|
|
|
|
# Create a #define for the configuration family (config_name).
|
|
uconf=$(echo ${config_name} | tr '[:lower:]' '[:upper:]')
|
|
config_name_define="#define BLIS_FAMILY_${uconf}\n"
|
|
|
|
#create a AOCL specific #define
|
|
#This macro is enabled only for zen family configurations.
|
|
#This enables us to use different cache block sizes for TRSM instead of common level-3 block sizes.
|
|
uconf=$(echo ${config_name} | grep -c 'zen\|amd64' | cut -d. -f1)
|
|
if [[ $uconf == 1 ]]; then
|
|
enable_aocl_zen='yes'
|
|
enable_aocl_zen_01=1
|
|
else
|
|
enable_aocl_zen='no'
|
|
enable_aocl_zen_01=0;
|
|
fi
|
|
|
|
# Create a list of #defines, one for each configuration in config_list.
|
|
config_list_defines=""
|
|
for conf in ${config_list}; do
|
|
|
|
# Convert the current config name to uppercase.
|
|
uconf=$(echo ${conf} | tr '[:lower:]' '[:upper:]')
|
|
|
|
# Create a #define and add it to the running list.
|
|
config_define="BLIS_CONFIG_${uconf}"
|
|
config_list_defines="${config_list_defines}#define ${config_define}\n"
|
|
done
|
|
|
|
# Create a list of #defines, one for each kernel set in kernel_list.
|
|
kernel_list_defines=""
|
|
for kern in ${kernel_list}; do
|
|
|
|
# Convert the current config name to uppercase.
|
|
uconf=$(echo ${kern} | tr '[:lower:]' '[:upper:]')
|
|
|
|
# Create a #define and add it to the running list.
|
|
kernel_define="BLIS_KERNELS_${uconf}"
|
|
kernel_list_defines="${kernel_list_defines}#define ${kernel_define}\n"
|
|
done
|
|
|
|
# Create a list of #includes, one for each addon in addon_list.
|
|
addon_list_includes=""
|
|
for addon in ${addon_list}; do
|
|
|
|
# Create a #define and add it to the running list.
|
|
addon_header="\"${addon}.h\""
|
|
addon_list_includes="${addon_list_includes}#include ${addon_header}\n"
|
|
done
|
|
|
|
|
|
# -- Determine whether we are performing an out-of-tree build --------------
|
|
|
|
if [ "${dist_path}" != "./" ]; then
|
|
|
|
# At this point, we know the user did not run "./configure". But we
|
|
# have not yet ruled out "<fullpath>/configure" or some # equivalent
|
|
# that uses relative paths. To further rule out these possibilities,
|
|
# we create a dummy file in the current build directory.
|
|
touch "./${dummy_file}"
|
|
|
|
# If the dummy file we just created in the current directory does not
|
|
# appear in the source distribution path, then we are in a different
|
|
# directory and thus we must create a symbolic link.
|
|
if [ ! -f "${dist_path}/${dummy_file}" ]; then
|
|
configured_oot="yes"
|
|
#echo "${script_name}: detected out-of-tree build directory."
|
|
else
|
|
configured_oot="no"
|
|
#echo "${script_name}: detected in-tree build directory."
|
|
fi
|
|
|
|
# Remove the dummy file.
|
|
rm -f "./${dummy_file}"
|
|
fi
|
|
|
|
|
|
# -- Instantiate config.mk file from template ------------------------------
|
|
|
|
# Begin substituting information into the config_mk_in file, outputting
|
|
# to config_mk_out.
|
|
echo "${script_name}: creating ${config_mk_out_path} from ${config_mk_in_path}"
|
|
cat "${config_mk_in_path}" \
|
|
| sed -e "s/@version@/${version_esc}/g" \
|
|
| sed -e "s/@so_version_major@/${so_version_major}/g" \
|
|
| sed -e "s/@so_version_minorbuild@/${so_version_minorbuild}/g" \
|
|
| sed -e "s/@config_name@/${config_name}/g" \
|
|
| sed -e "s/@config_list@/${config_list}/g" \
|
|
| sed -e "s/@kernel_list@/${kernel_list}/g" \
|
|
| sed -e "s/@kconfig_map@/${kconfig_map}/g" \
|
|
| sed -e "s/@os_name@/${os_name_esc}/g" \
|
|
| sed -e "s/@is_win@/${is_win}/g" \
|
|
| sed -e "s/@dist_path@/${dist_path_esc}/g" \
|
|
| sed -e "s/@CC_VENDOR@/${cc_vendor}/g" \
|
|
| sed -e "s/@gcc_older_than_4_9_0@/${gcc_older_than_4_9_0}/g" \
|
|
| sed -e "s/@gcc_older_than_6_1_0@/${gcc_older_than_6_1_0}/g" \
|
|
| sed -e "s/@gcc_older_than_9_1_0@/${gcc_older_than_9_1_0}/g" \
|
|
| sed -e "s/@CC@/${cc_esc}/g" \
|
|
| sed -e "s/@CXX@/${cxx_esc}/g" \
|
|
| sed -e "s/@RANLIB@/${ranlib_esc}/g" \
|
|
| sed -e "s/@AR@/${ar_esc}/g" \
|
|
| sed -e "s/@PYTHON@/${python_esc}/g" \
|
|
| sed -e "s/@libpthread@/${libpthread_esc}/g" \
|
|
| sed -e "s/@cflags_preset@/${cflags_preset_esc}/g" \
|
|
| sed -e "s/@ldflags_preset@/${ldflags_preset_esc}/g" \
|
|
| sed -e "s/@debug_type@/${debug_type}/g" \
|
|
| sed -e "s/@enable_system@/${enable_system}/g" \
|
|
| sed -e "s/@threading_model@/${threading_model}/g" \
|
|
| sed -e "s/@prefix@/${prefix_esc}/g" \
|
|
| sed -e "s/@exec_prefix@/${exec_prefix_esc}/g" \
|
|
| sed -e "s/@libdir@/${libdir_esc}/g" \
|
|
| sed -e "s/@includedir@/${includedir_esc}/g" \
|
|
| sed -e "s/@sharedir@/${sharedir_esc}/g" \
|
|
| sed -e "s/@enable_verbose@/${enable_verbose}/g" \
|
|
| sed -e "s/@configured_oot@/${configured_oot}/g" \
|
|
| sed -e "s/@enable_arg_max_hack@/${enable_arg_max_hack}/g" \
|
|
| sed -e "s/@enable_static@/${enable_static}/g" \
|
|
| sed -e "s/@enable_shared@/${enable_shared}/g" \
|
|
| sed -e "s/@export_shared@/${export_shared}/g" \
|
|
| sed -e "s/@enable_blas@/${enable_blas}/g" \
|
|
| sed -e "s/@enable_cblas@/${enable_cblas}/g" \
|
|
| sed -e "s/@enable_memkind@/${enable_memkind}/g" \
|
|
| sed -e "s/@pragma_omp_simd@/${pragma_omp_simd}/g" \
|
|
| sed -e "s/@addon_list@/${addon_list}/g" \
|
|
| sed -e "s/@sandbox@/${sandbox}/g" \
|
|
| sed -e "s/@enable_trsm_preinversion@/${enable_trsm_preinversion}/g" \
|
|
| sed -e "s/@enable_aocl_dynamic@/${enable_aocl_dynamic}/g" \
|
|
| sed -e "s/@complex_return@/${complex_return}/g" \
|
|
| sed -e "s/@blas_int_type_size@/${blas_int_type_size}/g" \
|
|
| sed -e "s/\@enable_aocl_zen\@/${enable_aocl_zen}/g" \
|
|
> "${config_mk_out_path}"
|
|
|
|
# -- Instantiate bli_config.h file from template ---------------------------
|
|
|
|
# Begin substituting information into the bli_config_h_in file, outputting
|
|
# to bli_config_h_out. NOTE: We use perl instead of sed because the version
|
|
# of sed used on OS X is old and does not handle the '\n' character
|
|
# intuitively, which was used when constructing ${config_name_define},
|
|
# ${config_list_defines}, and ${kernel_list_defines}.
|
|
echo "${script_name}: creating ${bli_config_h_out_path} from ${bli_config_h_in_path}"
|
|
cat "${bli_config_h_in_path}" \
|
|
| perl -pe "s/\@config_name_define\@/${config_name_define}/g" \
|
|
| perl -pe "s/\@config_list_defines\@/${config_list_defines}/g" \
|
|
| perl -pe "s/\@kernel_list_defines\@/${kernel_list_defines}/g" \
|
|
| sed -e "s/\@enable_aocl_zen\@/${enable_aocl_zen_01}/g" \
|
|
| sed -e "s/@enable_system@/${enable_system_01}/g" \
|
|
| sed -e "s/@enable_openmp@/${enable_openmp_01}/g" \
|
|
| sed -e "s/@enable_pthreads@/${enable_pthreads_01}/g" \
|
|
| sed -e "s/@enable_jrir_slab@/${enable_jrir_slab_01}/g" \
|
|
| sed -e "s/@enable_jrir_rr@/${enable_jrir_rr_01}/g" \
|
|
| sed -e "s/@enable_pba_pools@/${enable_pba_pools_01}/g" \
|
|
| sed -e "s/@enable_sba_pools@/${enable_sba_pools_01}/g" \
|
|
| sed -e "s/@enable_mem_tracing@/${enable_mem_tracing_01}/g" \
|
|
| sed -e "s/@int_type_size@/${int_type_size}/g" \
|
|
| sed -e "s/@blas_int_type_size@/${blas_int_type_size}/g" \
|
|
| sed -e "s/@enable_blas@/${enable_blas_01}/g" \
|
|
| sed -e "s/@enable_cblas@/${enable_cblas_01}/g" \
|
|
| sed -e "s/@enable_mixed_dt@/${enable_mixed_dt_01}/g" \
|
|
| sed -e "s/@enable_mixed_dt_extra_mem@/${enable_mixed_dt_extra_mem_01}/g" \
|
|
| sed -e "s/@enable_sup_handling@/${enable_sup_handling_01}/g" \
|
|
| sed -e "s/@enable_memkind@/${enable_memkind_01}/g" \
|
|
| sed -e "s/@enable_trsm_preinversion@/${enable_trsm_preinversion_01}/g" \
|
|
| sed -e "s/@enable_aocl_dynamic@/${enable_aocl_dynamic_01}/g" \
|
|
| sed -e "s/@enable_pragma_omp_simd@/${enable_pragma_omp_simd_01}/g" \
|
|
| sed -e "s/@enable_sandbox@/${enable_sandbox_01}/g" \
|
|
| sed -e "s/@enable_shared@/${enable_shared_01}/g" \
|
|
| sed -e "s/@complex_return_intel@/${complex_return_intel01}/g" \
|
|
| sed -e "s/@disable_blis_arch_type@/${disable_blis_arch_type_01}/g" \
|
|
| sed -e "s/@rename_blis_arch_type@/${rename_blis_arch_type}/g" \
|
|
> "${bli_config_h_out_path}"
|
|
|
|
# -- Instantiate bli_addon.h file from template ----------------------------
|
|
|
|
# Begin substituting information into the bli_addon_h_in file, outputting
|
|
# to bli_addon_h_out. NOTE: We use perl instead of sed because the version
|
|
# of sed used on OS X is old and does not handle the '\n' character
|
|
# intuitively, which was used when constructing ${addon_list_includes}.
|
|
echo "${script_name}: creating ${bli_addon_h_out_path} from ${bli_addon_h_in_path}"
|
|
cat "${bli_addon_h_in_path}" \
|
|
| perl -pe "s/\@addon_list_includes\@/${addon_list_includes}/g" \
|
|
| sed -e "s/@enable_addons@/${enable_addons_01}/g" \
|
|
> "${bli_addon_h_out_path}"
|
|
|
|
# -- Create top-level object directories -----------------------------------
|
|
|
|
# Create obj sub-directories (if they do not already exist).
|
|
base_obj_dirpath="${obj_dirpath}/${config_name}"
|
|
|
|
echo "${script_name}: creating ${base_obj_dirpath}"
|
|
mkdir -p ${base_obj_dirpath}
|
|
|
|
|
|
obj_config_dirpath="${base_obj_dirpath}/${config_dir}"
|
|
|
|
mkdir -p ${obj_config_dirpath}
|
|
for conf in ${config_list}; do
|
|
echo "${script_name}: creating ${obj_config_dirpath}/${conf}"
|
|
mkdir -p ${obj_config_dirpath}/${conf}
|
|
done
|
|
|
|
|
|
obj_kernels_dirpath="${base_obj_dirpath}/${kernels_dir}"
|
|
|
|
mkdir -p ${obj_kernels_dirpath}
|
|
for kern in ${kernel_list}; do
|
|
echo "${script_name}: creating ${obj_kernels_dirpath}/${kern}"
|
|
mkdir -p ${obj_kernels_dirpath}/${kern}
|
|
done
|
|
|
|
|
|
obj_refkern_dirpath="${base_obj_dirpath}/${refkern_dir}"
|
|
|
|
mkdir -p ${obj_refkern_dirpath}
|
|
for conf in ${config_list}; do
|
|
echo "${script_name}: creating ${obj_refkern_dirpath}/${conf}"
|
|
mkdir -p ${obj_refkern_dirpath}/${conf}
|
|
done
|
|
|
|
|
|
obj_aocldtl_dirpath="${base_obj_dirpath}/${aocldtl_dir}"
|
|
|
|
echo "${script_name}: creating ${obj_aocldtl_dirpath}"
|
|
mkdir -p ${obj_aocldtl_dirpath}
|
|
|
|
|
|
obj_frame_dirpath="${base_obj_dirpath}/${frame_dir}"
|
|
|
|
echo "${script_name}: creating ${obj_frame_dirpath}"
|
|
mkdir -p ${obj_frame_dirpath}
|
|
|
|
|
|
if [ -n "${addon_flag}" ]; then
|
|
|
|
obj_addon_dirpath="${base_obj_dirpath}/${addon_dir}"
|
|
|
|
for addon in ${addon_list}; do
|
|
echo "${script_name}: creating ${obj_addon_dirpath}/${addon}"
|
|
mkdir -p ${obj_addon_dirpath}/${addon}
|
|
done
|
|
fi
|
|
|
|
|
|
if [ -n "${sandbox_flag}" ]; then
|
|
|
|
obj_sandbox_dirpath="${base_obj_dirpath}/${sandbox_dir}"
|
|
|
|
echo "${script_name}: creating ${obj_sandbox_dirpath}/${sandbox}"
|
|
mkdir -p ${obj_sandbox_dirpath}/${sandbox}
|
|
fi
|
|
|
|
|
|
obj_blastest_dirpath="${base_obj_dirpath}/${blastest_dir}"
|
|
|
|
echo "${script_name}: creating ${obj_blastest_dirpath}"
|
|
mkdir -p ${obj_blastest_dirpath}
|
|
|
|
|
|
obj_testsuite_dirpath="${base_obj_dirpath}/${testsuite_dir}"
|
|
|
|
echo "${script_name}: creating ${obj_testsuite_dirpath}"
|
|
mkdir -p ${obj_testsuite_dirpath}
|
|
|
|
|
|
# Create lib directory (if it does not already exist).
|
|
base_lib_dirpath="${lib_dirpath}/${config_name}"
|
|
|
|
echo "${script_name}: creating ${base_lib_dirpath}"
|
|
mkdir -p ${base_lib_dirpath}
|
|
|
|
|
|
# Create include directory (if it does not already exist).
|
|
base_include_dirpath="${include_dirpath}/${config_name}"
|
|
|
|
echo "${script_name}: creating ${base_include_dirpath}"
|
|
mkdir -p ${base_include_dirpath}
|
|
|
|
|
|
# -- Mirror source directory hierarchies to object directories -------------
|
|
|
|
# Combine the config_list with the config_name and then remove duplicates.
|
|
config_list_plus_name=$(rm_duplicate_words "${config_list} ${config_name}")
|
|
|
|
# Mirror each of the sub-configuration directories to the object directory.
|
|
for conf in ${config_list_plus_name}; do
|
|
|
|
echo "${script_name}: mirroring ${config_dirpath}/${conf} to ${obj_config_dirpath}/${conf}"
|
|
${mirror_tree_sh} "${config_dirpath}/${conf}" "${obj_config_dirpath}/${conf}"
|
|
done
|
|
|
|
# Mirror optimized kernels source tree to its object sub-directory.
|
|
# We perform the mirroring on each configuration/kernel sub-directory
|
|
# within 'kernels'.
|
|
for kern in ${kernel_list}; do
|
|
|
|
# Only mirror the optimized kernels source directory if it exists.
|
|
# There are occasions where one of the sub-configurations in the
|
|
# config_list does not correspond to a kernels sub-directory, such
|
|
# as when architecture B is so close to architecture A that B can
|
|
# use A's kernel source code unmodified (though perhaps with
|
|
# different blocksizes).
|
|
#if [ -d "${kernels_dirpath}/${conf}" ]; then
|
|
|
|
echo "${script_name}: mirroring ${kernels_dirpath}/${kern} to ${obj_kernels_dirpath}/${kern}"
|
|
${mirror_tree_sh} "${kernels_dirpath}/${kern}" "${obj_kernels_dirpath}/${kern}"
|
|
#else
|
|
# echo "${script_name}: mirroring ${kernels_dirpath}/${conf} skipped... directory does not exist"
|
|
#fi
|
|
done
|
|
|
|
# Mirror reference kernel source tree to its object sub-directory.
|
|
echo "${script_name}: mirroring ${refkern_dirpath} to ${obj_refkern_dirpath}"
|
|
${mirror_tree_sh} ${refkern_dirpath} ${obj_refkern_dirpath}
|
|
|
|
# Mirror reference kernels source tree to its object sub-directory.
|
|
for conf in ${config_list}; do
|
|
|
|
echo "${script_name}: mirroring ${refkern_dirpath} to ${obj_refkern_dirpath}/${conf}"
|
|
${mirror_tree_sh} "${refkern_dirpath}" "${obj_refkern_dirpath}/${conf}"
|
|
done
|
|
|
|
# Mirror framework source tree to its object sub-directory.
|
|
echo "${script_name}: mirroring ${frame_dirpath} to ${obj_frame_dirpath}"
|
|
${mirror_tree_sh} ${frame_dirpath} ${obj_frame_dirpath}
|
|
|
|
# Mirror framework source tree to its object sub-directory.
|
|
echo "${script_name}: mirroring ${aocldtl_dirpath} to ${obj_aocldtl_dirpath}"
|
|
${mirror_tree_sh} ${aocldtl_dirpath} ${obj_aocldtl_dirpath}
|
|
|
|
# Mirror the chosen addon source tree to its object sub-directory.
|
|
if [ -n "${addon_flag}" ]; then
|
|
|
|
for addon in ${addon_list}; do
|
|
|
|
echo "${script_name}: mirroring ${addon_dirpath}/${addon} to ${obj_addon_dirpath}/${addon}"
|
|
${mirror_tree_sh} "${addon_dirpath}/${addon}" "${obj_addon_dirpath}/${addon}"
|
|
done
|
|
fi
|
|
|
|
# Mirror the chosen sandbox source tree to its object sub-directory.
|
|
if [ -n "${sandbox_flag}" ]; then
|
|
|
|
echo "${script_name}: mirroring ${sandbox_dirpath}/${sandbox} to ${obj_sandbox_dirpath}/${sandbox}"
|
|
${mirror_tree_sh} "${sandbox_dirpath}/${sandbox}" "${obj_sandbox_dirpath}/${sandbox}"
|
|
fi
|
|
|
|
|
|
# -- Generate makefile fragements ------------------------------------------
|
|
|
|
clist_contains_cname=$(is_in_list "${config_name}" "${config_list}")
|
|
|
|
# If the config_list does not already contain the config_name (i.e.,
|
|
# if config_name is an umbrella family), generate makefiles in that
|
|
# directory. (In the next step, we will loop over the actual sub-
|
|
# configurations and create fragments there as well.)
|
|
if [ "${clist_contains_cname}" == "false" ]; then
|
|
|
|
echo "${script_name}: creating makefile fragments in ${obj_config_dirpath}/${config_name}"
|
|
${gen_make_frags_sh} \
|
|
-h -r -v0 \
|
|
-o ${script_name} \
|
|
-p 'CONFIG' \
|
|
${config_dirpath}/${config_name} \
|
|
${obj_config_dirpath}/${config_name} \
|
|
${gen_make_frags_dirpath}/fragment.mk \
|
|
${gen_make_frags_dirpath}/suffix_list \
|
|
${gen_make_frags_dirpath}/ignore_list
|
|
fi
|
|
|
|
# Generate makefile fragments for each of the sub-configurations present
|
|
# in the configuration list.
|
|
for conf in ${config_list}; do
|
|
|
|
echo "${script_name}: creating makefile fragments in ${obj_config_dirpath}/${conf}"
|
|
${gen_make_frags_sh} \
|
|
-h -r -v0 \
|
|
-o ${script_name} \
|
|
-p 'CONFIG' \
|
|
${config_dirpath}/${conf} \
|
|
${obj_config_dirpath}/${conf} \
|
|
${gen_make_frags_dirpath}/fragment.mk \
|
|
${gen_make_frags_dirpath}/suffix_list \
|
|
${gen_make_frags_dirpath}/ignore_list
|
|
done
|
|
|
|
# Generate makefile fragments for each of the kernel sets required by
|
|
# the configuration list (in the kernel list).
|
|
for kern in ${kernel_list}; do
|
|
|
|
echo "${script_name}: creating makefile fragments in ${obj_kernels_dirpath}/${kern}"
|
|
${gen_make_frags_sh} \
|
|
-h -r -v0 \
|
|
-o ${script_name} \
|
|
-p 'KERNELS' \
|
|
${kernels_dirpath}/${kern} \
|
|
${obj_kernels_dirpath}/${kern} \
|
|
${gen_make_frags_dirpath}/fragment.mk \
|
|
${gen_make_frags_dirpath}/suffix_list \
|
|
${gen_make_frags_dirpath}/ignore_list
|
|
done
|
|
|
|
# Generate makefile fragments in the reference kernels directory.
|
|
echo "${script_name}: creating makefile fragments in ${obj_refkern_dirpath}"
|
|
${gen_make_frags_sh} \
|
|
-h -r -v0 \
|
|
-o ${script_name} \
|
|
-p 'REFKERN' \
|
|
${refkern_dirpath} \
|
|
${obj_refkern_dirpath} \
|
|
${gen_make_frags_dirpath}/fragment.mk \
|
|
${gen_make_frags_dirpath}/suffix_list \
|
|
${gen_make_frags_dirpath}/ignore_list
|
|
|
|
# Generate makefile fragments in the DTL directory.
|
|
echo "${script_name}: creating makefile fragments in ${obj_aocldtl_dirpath}"
|
|
${gen_make_frags_sh} \
|
|
-h -r -v0 \
|
|
-o ${script_name} \
|
|
-p 'AOCLDTL' \
|
|
${aocldtl_dirpath} \
|
|
${obj_aocldtl_dirpath} \
|
|
${gen_make_frags_dirpath}/fragment.mk \
|
|
${gen_make_frags_dirpath}/suffix_list \
|
|
${gen_make_frags_dirpath}/ignore_list
|
|
|
|
# Generate makefile fragments in the framework directory.
|
|
echo "${script_name}: creating makefile fragments in ${obj_frame_dirpath}"
|
|
${gen_make_frags_sh} \
|
|
-h -r -v0 \
|
|
-o ${script_name} \
|
|
-p 'FRAME' \
|
|
${frame_dirpath} \
|
|
${obj_frame_dirpath} \
|
|
${gen_make_frags_dirpath}/fragment.mk \
|
|
${gen_make_frags_dirpath}/suffix_list \
|
|
${gen_make_frags_dirpath}/ignore_list
|
|
|
|
# Generate makefile fragments in the addon sub-directory.
|
|
if [ -n "${addon_flag}" ]; then
|
|
|
|
for addon in ${addon_list}; do
|
|
|
|
echo "${script_name}: creating makefile fragments in ${obj_addon_dirpath}/${addon}"
|
|
${gen_make_frags_sh} \
|
|
-h -r -v0 \
|
|
-o ${script_name} \
|
|
-p 'ADDON' \
|
|
${addon_dirpath}/${addon} \
|
|
${obj_addon_dirpath}/${addon} \
|
|
${gen_make_frags_dirpath}/fragment.mk \
|
|
${gen_make_frags_dirpath}/suffix_list \
|
|
${gen_make_frags_dirpath}/ignore_list
|
|
done
|
|
fi
|
|
|
|
|
|
# Generate makefile fragments in the sandbox sub-directory.
|
|
if [ -n "${sandbox_flag}" ]; then
|
|
|
|
echo "${script_name}: creating makefile fragments in ${obj_sandbox_dirpath}/${sandbox}"
|
|
${gen_make_frags_sh} \
|
|
-h -r -v0 \
|
|
-o ${script_name} \
|
|
-p 'SANDBOX' \
|
|
${sandbox_dirpath}/${sandbox} \
|
|
${obj_sandbox_dirpath}/${sandbox} \
|
|
${gen_make_frags_dirpath}/fragment.mk \
|
|
${gen_make_frags_dirpath}/suffix_list \
|
|
${gen_make_frags_dirpath}/ignore_list
|
|
fi
|
|
|
|
|
|
# -- Handle out-of-tree builds ---------------------------------------------
|
|
|
|
# Under some circumstances, we need to create some symbolic links to
|
|
# properly handle out-of-tree builds.
|
|
if [ "${configured_oot}" = "yes" ]; then
|
|
|
|
# If 'Makefile' symlink does not already exist in the current
|
|
# directory, create a symbolic link to it. If one does exist, we
|
|
# use -f to force creation of a new link.
|
|
if [ ! -e "./Makefile" ]; then
|
|
|
|
echo "${script_name}: creating symbolic link to Makefile."
|
|
ln -s "${dist_path}/Makefile"
|
|
|
|
elif [ -h "./Makefile" ]; then
|
|
echo "${script_name}: symbolic link to Makefile already exists; forcing creation of new link."
|
|
ln -sf "${dist_path}/Makefile"
|
|
else
|
|
echo "${script_name}: Non-symbolic link file or directory 'Makefile' blocks creation of symlink."
|
|
echo "${script_name}: *** Please remove this entity and re-run configure."
|
|
exit 1
|
|
fi
|
|
|
|
# If 'common.mk' symlink does not already exist in the current
|
|
# directory, create a symbolic link to it. If one does exist, we
|
|
# use -f to force creation of a new link.
|
|
if [ ! -e "./common.mk" ]; then
|
|
|
|
echo "${script_name}: creating symbolic link to common.mk."
|
|
ln -s "${dist_path}/common.mk"
|
|
|
|
elif [ -h "./common.mk" ]; then
|
|
echo "${script_name}: symbolic link to common.mk already exists; forcing creation of new link."
|
|
ln -sf "${dist_path}/common.mk"
|
|
else
|
|
echo "${script_name}: Non-symbolic link file or directory 'common.mk' blocks creation of symlink."
|
|
echo "${script_name}: *** Please remove this entity and re-run configure."
|
|
exit 1
|
|
fi
|
|
|
|
# If 'config' symlink does not already exist in the current
|
|
# directory, create a symbolic link to it. If one does exist, we
|
|
# use -f to force creation of a new link.
|
|
if [ ! -e "./config" ]; then
|
|
|
|
echo "${script_name}: creating symbolic link to 'config' directory."
|
|
ln -s "${dist_path}/config"
|
|
|
|
elif [ -h "./config" ]; then
|
|
echo "${script_name}: symbolic link to 'config' directory already exists; forcing creation of new link."
|
|
ln -sf "${dist_path}/config"
|
|
else
|
|
echo "${script_name}: Non-symbolic link file or directory 'config' blocks creation of symlink."
|
|
echo "${script_name}: *** Please remove this entity and re-run configure."
|
|
exit 1
|
|
fi
|
|
|
|
echo "${script_name}: configured to build outside of source distribution."
|
|
else
|
|
|
|
echo "${script_name}: configured to build within top-level directory of source distribution."
|
|
fi
|
|
|
|
if [ "${warn_user_generic}" = "1" ]; then
|
|
|
|
echo "${script_name}: "
|
|
echo "${script_name}: *** Unable to automatically detect hardware type! ***"
|
|
echo "${script_name}: "
|
|
echo "${script_name}: NOTE: configure was unable to identify a subconfiguration"
|
|
echo "${script_name}: optimized for your hardware. As a result, the 'generic'"
|
|
echo "${script_name}: subconfiguration (with low-performance reference kernels)"
|
|
echo "${script_name}: will be used. For support, please open an issue on GitHub"
|
|
echo "${script_name}: at https://github.com/flame/blis/issues."
|
|
echo "${script_name}: "
|
|
fi
|
|
|
|
# Exit peacefully.
|
|
return 0
|
|
}
|
|
|
|
|
|
# The script's main entry point, passing all parameters given.
|
|
main "$@"
|
|
|