Files
blis/configure
Balasubramanian, Vignesh 73911d5990 Updates to the build systems(CMake and Make) for LPGEMM compilation (#303)
- The current build systems have the following behaviour
  with regards to building "aocl_gemm" addon codebase(LPGEMM)
  when giving "amdzen" as the target architecture(fat-binary)
  - Make:  Attempts to compile LPGEMM kernels using the same
                compiler flags that the makefile fragments set for BLIS
                kernels, based on the compiler version.
  - CMake: With presets, it always enables the addon compilation
                 unless explicitly specified with the ENABLE_ADDON variable.

- This poses a bug with older compilers, owing to them not supporting
  BF16 or INT8 intrinsic compilation.

- This patch adds the functionality to check for GCC and Clang compiler versions,
  and disables LPGEMM compilation if GCC < 11.2 or Clang < 12.0.

- Make:  Updated the configure script to check for the compiler version
              if the addon is specified.
  CMake: Updated the main CMakeLists.txt to check for the compiler version
               if the addon is specified, and to also force-update the associated
               cache variable update. Also updated kernels/CMakeLists.txt to
               check if "aocl_gemm" remains in the ENABLE_ADDONS list after
               all the checks in the previous layers.

AMD-Internal: [CPUPL-7850]

Signed-off by : Vignesh Balasubramanian <Vignesh.Balasubramanian@amd.com>
2026-01-16 19:39:55 +05:30

4208 lines
141 KiB
Bash
Executable File

#!/usr/bin/env bash
#
# BLIS
# An object-based framework for developing high-performance BLAS-like
# libraries.
#
# Copyright (C) 2014, The University of Texas at Austin
# Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# - Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# - Neither the name(s) of the copyright holder(s) nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
#
# -- Helper functions ----------------------------------------------------------
#
print_usage()
{
# Use the version string in the 'version' file since we don't have
# the patched version string yet.
if [ -z "${version}" ]; then
version=$(cat "${version_filepath}")
fi
# Echo usage info.
echo " "
echo " ${script_name} (BLIS ${version})"
#echo " "
#echo " BLIS ${version}"
echo " "
echo " Configure BLIS's build system for compilation using a specified"
echo " configuration directory."
echo " "
echo " Usage:"
echo " "
echo " ${script_name} [options] [env. vars.] confname"
echo " "
echo " Arguments:"
echo " "
echo " confname The name of the sub-directory inside of the 'config'"
echo " directory containing the desired BLIS configuration."
echo " Note that confname MUST be specified; if it is not,"
echo " configure will complain. To build a completely generic"
echo " implementation, use the 'generic' configuration"
echo " "
echo " Options:"
echo " "
echo " -p PREFIX, --prefix=PREFIX"
echo " "
echo " The common installation prefix for all files. If given,"
echo " this option effectively implies:"
echo " --libdir=EXECPREFIX/lib"
echo " --includedir=PREFIX/include"
echo " --sharedir=PREFIX/share"
echo " where EXECPREFIX defaults to PREFIX. If this option is"
echo " not given, PREFIX defaults to '${prefix_def}'. If PREFIX"
echo " refers to a directory that does not exist, it will be"
echo " created."
echo " "
echo " --exec-prefix=EXECPREFIX"
echo " "
echo " The installation prefix for libraries. Specifically, if"
echo " given, this option effectively implies:"
echo " --libdir=EXECPREFIX/lib"
echo " If not given, EXECPREFIX defaults to PREFIX, which may be"
echo " modified by the --prefix option. If EXECPREFIX refers to"
echo " a directory that does not exist, it will be created."
echo " "
echo " --libdir=LIBDIR"
echo " "
echo " The path to which make will install libraries. If not"
echo " given, LIBDIR defaults to PREFIX/lib. If LIBDIR refers to"
echo " a directory that does not exist, it will be created."
echo " "
echo " --includedir=INCDIR"
echo " "
echo " The path to which make will install development header"
echo " files. If not given, INCDIR defaults to PREFIX/include."
echo " If INCDIR refers to a directory that does not exist, it"
echo " will be created."
echo " "
echo " --sharedir=SHAREDIR"
echo " "
echo " The path to which make will makefile fragments containing"
echo " make variables determined by configure (e.g. CC, CFLAGS,"
echo " and LDFLAGS). These files allow certain BLIS makefiles,"
echo " such as those in the examples or testsuite directories, to"
echo " operate on an installed copy of BLIS rather than a local"
echo " (and possibly uninstalled) copy. If not given, SHAREDIR"
echo " defaults to PREFIX/share. If SHAREDIR refers to a"
echo " directory that does not exist, it will be created."
echo " "
echo " --enable-verbose-make, --disable-verbose-make"
echo " "
echo " Enable (disabled by default) verbose compilation output"
echo " during make."
echo " "
echo " --enable-arg-max-hack --disable-arg-max-hack"
echo " "
echo " Enable (disabled by default) build system logic that"
echo " will allow archiving/linking the static/shared library"
echo " even if the command plus command line arguments exceeds"
echo " the operating system limit (ARG_MAX)."
echo " "
echo " -d DEBUG, --enable-debug[=DEBUG]"
echo " "
echo " Enable debugging symbols in the library. If argument"
echo " DEBUG is given as 'opt', then optimization flags are"
echo " kept in the framework, otherwise optimization is"
echo " turned off."
echo " "
echo " --disable-static, --enable-static"
echo " "
echo " Disable (enabled by default) building BLIS as a static"
echo " library. If the static library build is disabled, the"
echo " shared library build must remain enabled."
echo " "
echo " --disable-shared, --enable-shared"
echo " "
echo " Disable (enabled by default) building BLIS as a shared"
echo " library. If the shared library build is disabled, the"
echo " static library build must remain enabled."
echo " "
echo " --enable-rpath, --disable-rpath"
echo " "
echo " Enable (disabled by default) setting an install_name for"
echo " dynamic libraries on macOS which starts with @rpath rather"
echo " than the absolute install path."
echo " "
echo " -e SYMBOLS, --export-shared[=SYMBOLS]"
echo " "
echo " Specify the subset of library symbols that are exported"
echo " within a shared library. Valid values for SYMBOLS are:"
echo " 'public' (the default) and 'all'. By default, only"
echo " functions and variables that belong to public APIs are"
echo " exported in shared libraries. However, the user may"
echo " instead export all symbols in BLIS, even those that were"
echo " intended for internal use only. Note that the public APIs"
echo " encompass all functions that almost any user would ever"
echo " want to call, including the BLAS/CBLAS compatibility APIs"
echo " as well as the basic and expert interfaces to the typed"
echo " and object APIs that are unique to BLIS. Also note that"
echo " changing this option to 'all' will have no effect in some"
echo " environments, such as when compiling with clang on"
echo " Windows."
echo " "
echo " -t MODEL, --enable-threading[=MODEL], --disable-threading"
echo " "
echo " Enable threading in the library, using threading model"
echo " MODEL={openmp,pthreads,no}. If MODEL=no or "
echo " --disable-threading is specified, threading will be"
echo " disabled. The default is 'no'."
echo " "
echo " --enable-system, --disable-system"
echo " "
echo " Enable conventional operating system support, such as"
echo " pthreads for thread-safety. The default state is enabled."
echo " However, in rare circumstances you may wish to configure"
echo " BLIS for use with a minimal or nonexistent operating"
echo " system (e.g. hardware simulators). In these situations,"
echo " --disable-system may be used to jettison all compile-time"
echo " and link-time dependencies outside of the standard C"
echo " library. When disabled, this option also forces the use"
echo " of --disable-threading."
echo " "
echo " --disable-pba-pools, --enable-pba-pools"
echo " --disable-sba-pools, --enable-sba-pools"
echo " "
echo " Disable (enabled by default) use of internal memory pools"
echo " within the packing block allocator (pba) and/or the small"
echo " block allocator (sba). The former is used to allocate"
echo " memory used to pack submatrices while the latter is used"
echo " to allocate control/thread tree nodes and thread"
echo " communicators. Both allocations take place in the context"
echo " of level-3 operations. When the pba is disabled, the"
echo " malloc()-like function specified by BLIS_MALLOC_POOL is"
echo " called on-demand whenever a packing block is needed, and"
echo " when the sba is disabled, the malloc()-like function"
echo " specified by BLIS_MALLOC_INTL is called whenever a small"
echo " block is needed, with the two allocators calling free()-"
echo " like functions BLIS_FREE_POOL and BLIS_FREE_INTL,"
echo " respectively when blocks are released. When enabled,"
echo " either or both pools are populated via the same functions"
echo " mentioned previously, and henceforth blocks are checked"
echo " out and in. The library quickly reaches a state in which"
echo " it no longer needs to call malloc() or free(), even"
echo " across many separate level-3 operation invocations."
echo " "
echo " --enable-mem-tracing, --disable-mem-tracing"
echo " "
echo " Enable (disable by default) output to stdout that traces"
echo " the allocation and freeing of memory, including the names"
echo " of the functions that triggered the allocation/freeing."
echo " Enabling this option WILL NEGATIVELY IMPACT PERFORMANCE."
echo " Please use only for informational/debugging purposes."
echo " "
echo " -i SIZE, --int-size=SIZE"
echo " "
echo " Set the size (in bits) of internal BLIS integers and"
echo " integer types used in native BLIS interfaces. The"
echo " default integer type size is architecture dependent."
echo " (Hint: You can always find this value printed at the"
echo " beginning of the testsuite output.)"
echo " "
echo " -b SIZE, --blas-int-size=SIZE"
echo " "
echo " Set the size (in bits) of integer types in external"
echo " BLAS and CBLAS interfaces, if enabled. The default"
echo " integer type size used in BLAS/CBLAS is 32 bits."
echo " "
echo " --disable-blas, --enable-blas"
echo " "
echo " Disable (enabled by default) building the BLAS"
echo " compatibility layer."
echo " "
echo " --enable-cblas, --disable-cblas"
echo " "
echo " Enable (disabled by default) building the CBLAS"
echo " compatibility layer. This automatically enables the"
echo " BLAS compatibility layer as well."
echo " "
echo " --disable-mixed-dt, --enable-mixed-dt"
echo " "
echo " Disable (enabled by default) support for mixing the"
echo " storage domain and/or storage precision of matrix"
echo " operands for the gemm operation, as well as support"
echo " for computing in a precision different from one or"
echo " both of matrices A and B."
echo " "
echo " --disable-mixed-dt-extra-mem, --enable-mixed-dt-extra-mem"
echo " "
echo " Disable (enabled by default) support for additional"
echo " mixed datatype optimizations that require temporarily"
echo " allocating extra memory--specifically, a single m x n"
echo " matrix (per application thread) whose storage datatype"
echo " is equal to the computation datatype. This option may"
echo " only be enabled when mixed domain/precision support is"
echo " enabled."
echo " "
echo " --disable-mnk1-matrix, --enable-mnk1-matrix"
echo " "
echo " Disable (enabled by default) handling of matrix problem"
echo " where M, N or K = 1 via separate code branches. When disabled,"
echo " these operations will be performed by gemm rather than gemv"
echo " or other optimized implementations."
echo " "
echo " --disable-tiny-matrix, --enable-tiny-matrix"
echo " "
echo " Disable (enabled by default) handling of tiny"
echo " matrix problems via tiny code branches. When disabled,"
echo " these tiny level-3 operations will be performed by"
echo " the conventional implementation, which is optimized for"
echo " medium and large problems. Note that what qualifies as"
echo " \"tiny\" depends on thresholds that may vary by sub-"
echo " configuration."
echo " Currently only of relevance on configs that include"
echo " AMD Zen sub-configs"
echo " "
echo " --disable-small-matrix, --enable-small-matrix"
echo " "
echo " Disable (enabled by default) handling of small/skinny"
echo " matrix problems via small code branches. When disabled,"
echo " these small/skinny level-3 operations will be performed by"
echo " the conventional implementation, which is optimized for"
echo " medium and large problems. Note that what qualifies as"
echo " \"small\" depends on thresholds that may vary by sub-"
echo " configuration."
echo " Currently only of relevance on configs that include"
echo " AMD Zen sub-configs"
echo " "
echo " --disable-sup-handling, --enable-sup-handling"
echo " "
echo " Disable (enabled by default) handling of small/skinny"
echo " matrix problems via SUP code branches. When disabled,"
echo " these small/skinny level-3 operations will be performed by"
echo " the conventional implementation, which is optimized for"
echo " medium and large problems. Note that what qualifies as"
echo " \"SUP\" depends on thresholds that may vary by sub-"
echo " configuration."
echo " "
echo " --disable-small-matrix-trsm, --enable-small-matrix-trsm"
echo " "
echo " Disable (enabled by default) handling of small/skinny"
echo " TRSM problems via small code branches. When disabled,"
echo " these small/skinny level-3 operations will be performed by"
echo " the conventional implementation, which is optimized for"
echo " medium and large problems. Note that what qualifies as"
echo " \"small\" depends on thresholds that may vary by sub-"
echo " configuration."
echo " Currently only of relevance on configs that include"
echo " AMD Zen sub-configs"
echo " "
echo " -a NAME --enable-addon=NAME"
echo " "
echo " Enable the code provided by an addon. An addon consists"
echo " of a separate directory of code that provides additional"
echo " APIs, implementations, and/or operations that would"
echo " otherwise not be present within a build of BLIS. This"
echo " option may be used multiple times to specify the inclusion"
echo " of multiple addons. By default, no addons are enabled."
echo " "
echo " -s NAME --enable-sandbox=NAME"
echo " "
echo " Enable a separate sandbox implementation of gemm. This"
echo " option disables BLIS's conventional gemm implementation"
echo " (which shares common infrastructure with other level-3"
echo " operations) and instead compiles and uses the code in"
echo " the NAME directory, which is expected to be a sub-"
echo " directory of 'sandbox'. By default, no sandboxes are"
echo " enabled."
echo " "
echo " --with-memkind, --without-memkind"
echo " "
echo " Forcibly enable or disable the use of libmemkind's"
echo " hbw_malloc() and hbw_free() as substitutes for malloc()"
echo " and free(), respectively, when allocating memory for"
echo " BLIS's memory pools, which are used to manage buffers"
echo " into which matrices are packed. The default behavior"
echo " for this option is environment-dependent; if configure"
echo " detects the presence of libmemkind, libmemkind is used"
echo " by default, and otherwise it is not used by default."
echo " "
echo " -r METHOD, --thread-part-jrir=METHOD"
echo " "
echo " Request a method of assigning micropanels to threads in"
echo " the JR and IR loops. Valid values for METHOD are 'slab'"
echo " and 'rr'. Using 'slab' assigns (as much as possible)"
echo " contiguous regions of micropanels to each thread while"
echo " using 'rr' assigns micropanels to threads in a round-"
echo " robin fashion. The chosen method also applies during"
echo " the packing of A and B. The default method is 'slab'."
echo " NOTE: Specifying this option constitutes a request,"
echo " which may be ignored in select situations if the"
echo " implementation has a good reason to do so."
echo " "
echo " --disable-trsm-preinversion, --enable-trsm-preinversion"
echo " "
echo " Disable (enabled by default) pre-inversion of triangular"
echo " matrix diagonals when performing trsm. When pre-inversion"
echo " is enabled, diagonal elements are inverted outside of the"
echo " microkernel (e.g. during packing) so that the microkernel"
echo " can use multiply instructions. When disabled, division"
echo " instructions are used within the microkernel. Executing"
echo " these division instructions within the microkernel will"
echo " incur a performance penalty, but numerical robustness will"
echo " improve for certain cases involving denormal numbers that"
echo " would otherwise result in overflow in the pre-inverted"
echo " values."
echo " "
echo " --force-version=STRING"
echo " "
echo " Force configure to use an arbitrary version string"
echo " STRING. This option may be useful when repackaging"
echo " custom versions of BLIS by outside organizations."
echo " "
echo " -c, --show-config-lists"
echo " "
echo " Print the config and kernel lists, and kernel-to-config"
echo " map after they are read from file. This can be useful"
echo " when debugging certain configuration issues, and/or as"
echo " a sanity check to make sure these lists are constituted"
echo " as expected."
echo " "
echo " --complex-return=gnu|intel"
echo " "
echo " Specify the way in which complex numbers are returned"
echo " from Fortran functions, either \"gnu\" (return in"
echo " registers) or \"intel\" (return via hidden argument)."
echo " If not specified and the environment variable FC is set,"
echo " attempt to determine the return type from the compiler."
echo " Otherwise, the default is \"gnu\"."
echo " "
echo " --enable-aocl-dynamic, --disable-aocl-dynamic"
echo " "
echo " Disable (Enabled by default) dynamic selection of number of"
echo " threads used to solve the given problem."
echo " Range of optimum number of threads will be [1, num_threads],"
echo " where \"num_threads\" is number of threads set by the application."
echo " Num_threads is derived from either environment variable"
echo " OMP_NUM_THREADS or BLIS_NUM_THREADS' or bli_set_num_threads() API."
echo " "
echo " --enable-security-flags, --disable-security-flags"
echo " "
echo " Disable (Enabled by default) addition of compiler and linker"
echo " security hardening flags (e.g. -D_FORTIFY_SOURCE=2 -fstack-protector-strong"
echo " and -Wl,-z,relro -Wl,-z,now on ELF platforms)."
echo " "
echo " --enable-blis-arch-type, --disable-blis-arch-type"
echo " "
echo " Disable support for AOCL_ENABLE_INSTRUCTIONS, BLIS_ARCH_TYPE and"
echo " BLIS_MODEL_TYPE environment variables, which allows user to select"
echo " architecture specific code path and optimizations at runtime."
echo " If disabled, in builds with multiple code paths, BLIS"
echo " will still select path and optimizations automatically."
echo " Default: Enabled in builds with multiple code paths, else disabled."
echo " "
echo " --rename-blis-arch-type=STRING"
echo " "
echo " Change environment variable used to select architecture specific"
echo " code path from BLIS_ARCH_TYPE to STRING"
echo " "
echo " --rename-blis-model-type=STRING"
echo " "
echo " Change environment variable used to select architecture model specific"
echo " optimizations from BLIS_MODEL_TYPE to STRING"
echo " "
echo " --enable-aocl-dtl=OPTION, --disable-aocl-dtl"
echo " "
echo " Enable DTL tracing and/or logging functionality"
echo " OPTION={all,trace,log,off}. The default is 'off'."
echo " Unrecognized options will be treated as 'off'."
echo " Details of the options:"
echo " * logging records basic information for each BLAS"
echo " call, with some APIs including timing information."
echo " * tracing records more detailed information on"
echo " the call stack within the BLAS APIs, and is mostly"
echo " of use for BLIS developers. The level of detail is"
echo " controlled by --aocl-dtl-trace-level option. More detailed"
echo " tracing will significantly increase API runtime."
echo " "
echo " --aocl-dtl-trace-level=OPTION"
echo " "
echo " Sets the level of detail in tracing, see the description"
echo " in aocl_dtl/aocldtlcf.h for more details."
echo " OPTION=1..10, used to set different levels of detail in"
echo " tracing. Default value is 5."
echo " "
echo " -q, --quiet Suppress informational output. By default, configure"
echo " is verbose. (NOTE: -q is not yet implemented)"
echo " "
echo " -h, --help Output this information and quit."
echo " "
echo " Environment Variables:"
echo " "
echo " CC Specifies the C compiler to use."
echo " CXX Specifies the C++ compiler to use (sandbox only)."
echo " FC Specifies the Fortran compiler to use (only to determine --complex-return)."
echo " RANLIB Specifies the ranlib executable to use."
echo " AR Specifies the archiver to use."
echo " CFLAGS Specifies additional compiler flags to use (prepended)."
echo " LDFLAGS Specifies additional linker flags to use (prepended)."
echo " LIBPTHREAD Pthreads library to use."
echo " PYTHON Specifies the python interpreter to use."
echo " "
echo " Environment variables may also be specified as command line"
echo " options, e.g.:"
echo " "
echo " ./configure [options] CC=gcc haswell"
echo " "
echo " Note that not all compilers are compatible with a given"
echo " configuration."
echo " "
# Exit with non-zero exit status
exit 1
}
query_array()
{
local arr key var_name
arr="$1"
key="$2"
var_name="${arr}_${key}"
echo "${!var_name}"
}
assign_key_value()
{
local arr key val
arr="$1"
key="$2"
val="$3"
printf -v "${arr}_${key}" %s "${val}"
}
#
# FGVZ: This commented-out function is being kept as an example how how
# to effectively "pass by reference" in bash. That is, pass the name of
# a variable, instead of its conents, and then let the function use the
# variable by prepending a $, at which time it can evaluate the string
# as if it were a literal variable occurance.
#
#filteradd_to_list()
#{
# local dlist ditem list_c item_c is_blacklisted
#
# # Add $1 to the list identified by $2, but only if $1 is not
# # found in a blacklist.
#
# # Note: $2 can actually be a list of items.
# dlist=\$"$1"
# ditem=\$"$2"
#
# # Acquire the contents of $list and $item and store them in list_c
# # and item_c, respectively.
# list_c=$(eval "expr \"$dlist\" ")
# item_c=$(eval "expr \"$ditem\" ")
#
# # Iterate over $item_c in case it is actually multiple items.
# for cur_item in $item_c; do
#
# is_blacklisted=$(is_in_list "${cur_item}" "${config_blist}")
# if [ ${is_blacklisted} == "false" ]; then
#
# # If cur_item is not blacklisted, add it to list_c.
# list_c="${list_c} ${cur_item}"
# fi
# done
#
# # Update the argument.
# eval "$1=\"${list_c}\""
#}
pass_config_kernel_registries()
{
local filename passnum
local all_blist
local curline list item config kernels
local cname clist klist
# Read function arguments:
# first argument: the file containing the configuration registry.
# second argument: the pass number: 0 or 1. Pass 0 builds the
# indirect config blacklist (indirect_blist) ONLY. Pass 1 actually
# begins populating the config and kernel registries, and assumes
# the indirect_blist has already been created.
filename="$1"
passnum="$2"
# Initialize a list of indirect blacklisted configurations for the
# current iteration. These are configurations that are invalidated by
# the removal of blacklisted configurations. For example, if haswell
# is registered as needing the 'haswell' and 'zen' kernel sets:
#
# haswell: haswell/haswell/zen
#
# and 'zen' was blacklisted because of the compiler version, then the
# 'haswell' configuration must be omitted from the registry, as it no
# longer has all of the kernel sets it was expecting.
if [ "${passnum}" == "0" ]; then
indirect_blist=""
fi
# For convenience, merge the original and indirect blacklists.
# NOTE: During pass 0, all_blist is equal to config_blist, since
# indirect_blist is still empty.
all_blist="${config_blist} ${indirect_blist}"
# Disable support for indirect blacklisting by returning early during
# pass 0. See issue #214 for details [1]. Basically, I realized that
# indirect blacklisting is not needed in the use case that I envisioned
# in the real-life example above. If a subconfiguration such as haswell
# is defined to require the zen kernel set, it implies that the zen
# kernels can be compiled with haswell compiler flags. That is, just
# because the zen subconfig (and its compiler flags) is blacklisted
# does not mean that the haswell subconfig cannot compile the zen
# kernels with haswell-specific flags.
#
# [1] https://github.com/flame/blis/issues/214
#
if [ "${passnum}" == "0" ]; then
return
fi
while read -r line
do
curline="${line}"
# Remove everything after comment character '#'.
curline=${curline%%#*}
# We've stripped out leading whitespace and trailing comments. If
# the line is now empty, then we can skip it altogether.
if [ "x${curline}" = "x" ]; then
continue;
fi
# Read the config name and config list for the current line.
cname=${curline%%:*}
list=${curline##*:}
# If we encounter a slash, it means the name of the configuration
# and the kernel set needed by that configuration are different.
if [[ "${list}" == *[/]* ]]; then
#echo "Slash found."
klist=""
clist=""
for item in "${list}"; do
# The sub-configuration name is always the first sub-word in
# the slash-separated compound word.
config=${item%%/*}
# Delete the sub-configuration name from the front of the
# string, leaving the slash-separated kernel names (or just
# the kernel name, if there is only one).
kernels=${list#*/}
# Replace the slashes with spaces to transform the string
# into a space-separated list of kernel names.
kernels=$(echo -e ${kernels} | sed -e "s/\// /g")
clist="${clist} ${config}"
klist="${klist} ${kernels}"
done
else
#echo "Slash not found."
clist=${list}
klist=${list}
fi
# Strip out whitespace from the config name and config/kernel list
# on each line.
cname=$(canonicalize_ws "${cname}")
clist=$(canonicalize_ws "${clist}")
klist=$(canonicalize_ws "${klist}")
# Next, we prepare to:
# - pass 0: inspect klist for blacklisted configurations, which may
# reveal configurations as needing to be indirectly blacklisted.
# - pass 1: compare cname to the blacklists and commit clist/klist
# to their respective registries, as appropriate.
# Handle singleton and umbrella configuration entries separately.
if [ $(is_singleton_family "${cname}" "${clist}") == "true" ]; then
# Singleton configurations/families.
# Note: for singleton families, clist contains one item, which
# always equals cname, but klist could contain more than one
# item.
# Only consider updating the indirect blacklist (pass 0) or
# committing clist and klist to the registries (pass 1) if the
# configuration name (cname) is not blacklisted.
if [ $(is_in_list "${cname}" "${all_blist}") == "false" ]; then
if [ "${passnum}" == "0" ]; then
# Even if the cname isn't blacklisted, one of the requisite
# kernels might be, so we need to check klist for blacklisted
# items. If we find one, we must assume that the entire entry
# must be thrown out. (Ideally, we would simply fall back to
# reference code for the blacklisted kernels, but that is not
# at all straightforward under the current configuration
# system architecture.) Thus, we add cname to the indirect
# blacklist.
for item in ${klist}; do
if [ $(is_in_list "${item}" "${config_blist}") == "true" ]; then
indirect_blist="${indirect_blist} ${cname}"
break
fi
done
fi
if [ "${passnum}" == "1" ]; then
# Store the clist to the cname key of the config registry.
#config_registry[${cname}]=${clist}
#printf -v "config_registry_${cname}" %s "${clist}"
assign_key_value "config_registry" "${cname}" "${clist}"
fi
fi
if [ "${passnum}" == "1" ]; then
# Store the klist to the cname key of the kernel registry.
#kernel_registry[${cname}]=${klist}
#printf -v "kernel_registry_${cname}" %s "${klist}"
assign_key_value "kernel_registry" "${cname}" "${klist}"
fi
else
# Umbrella configurations/families.
# First we check cname, which should generally not be blacklisted
# for umbrella families, but we check anyway just to be safe.
if [ $(is_in_list "${cname}" "${all_blist}") == "false" ]; then
if [ "${passnum}" == "1" ]; then
# Check each item in the clist and klist. (At this point,
# clist == klist.) If any sub-config is blacklisted, we
# omit it from clist and klist.
for item in ${clist}; do
if [ $(is_in_list "${item}" "${all_blist}") == "true" ]; then
clist=$(remove_from_list "${item}" "${clist}")
klist=$(remove_from_list "${item}" "${klist}")
fi
done
# Store the config and kernel lists to entries that
# corresponds to the config name.
#config_registry[${cname}]=${clist}
#kernel_registry[${cname}]=${klist}
#printf -v "config_registry_${cname}" %s "${clist}"
#printf -v "kernel_registry_${cname}" %s "${klist}"
assign_key_value "config_registry" "${cname}" "${clist}"
assign_key_value "kernel_registry" "${cname}" "${klist}"
fi
fi
fi
done < "${filename}"
if [ "${passnum}" == "0" ]; then
# Assign the final indirect blacklist (with whitespace removed).
indirect_blist="$(canonicalize_ws ${indirect_blist})"
fi
}
read_registry_file()
{
local filename
local clist klist
local iterate_again config
local cr_var mem mems_mem newclist
local kr_var ker kers_ker newklist
filename="$1"
# Execute an initial pass through the config_registry file so that
# we can accumulate a list of indirectly blacklisted configurations,
# if any.
pass_config_kernel_registries "${filename}" "0"
# Now that the indirect_blist has been created, make a second pass
# through the 'config_registry' file, this time creating the actual
# config and kernel registry data structures.
pass_config_kernel_registries "${filename}" "1"
# Now we must go back through the config_registry and subsitute any
# configuration families with their constituents' members. Each time
# one of these substitutions occurs, we set a flag that causes us to
# make one more pass. (Subsituting a singleton definition does not
# prompt additional iterations.) This process stops when a full pass
# does not result in any subsitution.
iterate_again="1"
while [ "${iterate_again}" == "1" ]; do
iterate_again="0"
#for config in "${!config_registry[@]}"; do
for cr_var in ${!config_registry_*}; do
config=${cr_var##config_registry_}
clist=$(query_array "config_registry" ${config})
# The entries that define singleton families should never need
# any substitution.
if [ $(is_singleton_family "${config}" "${clist}") == "true" ]; then
continue
fi
#for mem in ${config_registry[$config]}; do
#for mem in ${!cr_var}; do
for mem in ${clist}; do
#mems_mem="${config_registry[${mem}]}"
mems_mem=$(query_array "config_registry" ${mem})
# If mems_mem is empty string, then mem was not found as a key
# in the config list associative array. In that case, we continue
# and will echo an error later in the script.
if [ "${mems_mem}" == "" ]; then
#echo " config for ${mem} is empty string! no entry in config list."
continue;
fi
if [ "${mem}" != "${mems_mem}" ]; then
#clist="${config_registry[$config]}"
clisttmp=$(query_array "config_registry" ${config})
# Replace the current config with its constituent config set,
# canonicalize whitespace, and then remove duplicate config
# set names, if they exist. Finally, update the config registry
# with the new config list.
# NOTE: WE must use substitute_words() rather than a simple sed
# expression because we need to avoid matching partial strings.
# For example, if clist above contains "foo bar barsk" and we use
# sed to substitute "bee boo" as the members of "bar", the
# result would (incorrectly) be "foo bee boo bee boosk",
# which would then get reduced, via rm_duplicate_words(), to
# "foo bee boo boosk".
#newclist=$(echo -e "${clist}" | sed -e "s/${mem}/${mems_mem}/g")
newclist=$(substitute_words "${mem}" "${mems_mem}" "${clisttmp}")
newclist=$(canonicalize_ws "${newclist}")
newclist=$(rm_duplicate_words "${newclist}")
#config_registry[${config}]=${newclist}
#printf -v "config_registry_${config}" %s "${newclist}"
assign_key_value "config_registry" "${config}" "${newclist}"
# Since we performed a substitution and changed the config
# list, mark the iteration flag to continue another round,
# but only if the config (mem) value is NOT present
# in the list of sub-configs. If it is present, then further
# substitution may not necessarily be needed this round.
if [ $(is_in_list "${mem}" "${mems_mem}") == "false" ]; then
iterate_again="1"
fi
fi
done
done
done
# Similar to what we just did for the config_registry, we now iterate
# through the kernel_registry and substitute any configuration families
# in the kernel list (right side of ':') with the members of that
# family's kernel set. This process continues iteratively, as before,
# until all families have been replaced with singleton configurations'
# kernel sets.
iterate_again="1"
while [ "${iterate_again}" == "1" ]; do
iterate_again="0"
#for config in "${!kernel_registry[@]}"; do
for kr_var in ${!kernel_registry_*}; do
config=${kr_var##kernel_registry_}
klist=$(query_array "kernel_registry" ${config})
# The entries that define singleton families should never need
# any substitution. In the kernel registry, we know it's a
# singleton entry when the cname occurs somewhere in the klist.
# (This is slightly different than the same test in the config
# registry, where we test that clist is one word and that
# clist == cname.)
if [ $(is_in_list "${config}" "${klist}") == "true" ]; then
#echo "debug: '${config}' not found in '${klist}'; skipping."
continue
fi
#for ker in ${kernel_registry[$config]}; do
#for ker in ${!kr_var}; do
for ker in ${klist}; do
#kers_ker="${kernel_registry[${ker}]}"
kers_ker=$(query_array "kernel_registry" ${ker})
# If kers_ker is empty string, then ker was not found as a key
# in the kernel registry. While not common, this can happen
# when ker identifies a kernel set that does not correspond to
# any configuration. (Example: armv7a and armv8a kernel sets are
# used by cortexa* configurations, but do not corresond to their
# own configurations.)
if [ "${kers_ker}" == "" ]; then
#echo "debug: ${ker} not found in kernel registry."
continue
fi
# If the current config/kernel (ker) differs from its singleton kernel
# entry (kers_ker), then that singleton entry was specified to use
# a different configuration's kernel set. Thus, we need to replace the
# occurrence in the current config/kernel name with that of the kernel
# set it needs.
if [ "${ker}" != "${kers_ker}" ]; then
#klisttmp="${kernel_registry[$config]}"
klisttmp=$(query_array "kernel_registry" ${config})
# Replace the current config with its requisite kernels,
# canonicalize whitespace, and then remove duplicate kernel
# set names, if they exist. Finally, update the kernel registry
# with the new kernel list.
# NOTE: WE must use substitute_words() rather than a simple sed
# expression because we need to avoid matching partial strings.
# For example, if klist above contains "foo bar barsk" and we use
# sed to substitute "bee boo" as the members of "bar", the
# result would (incorrectly) be "foo bee boo bee boosk",
# which would then get reduced, via rm_duplicate_words(), to
# "foo bee boo boosk".
#newklist=$(echo -e "${klisttmp}" | sed -e "s/${ker}/${kers_ker}/g")
newklist=$(substitute_words "${ker}" "${kers_ker}" "${klisttmp}")
newklist=$(canonicalize_ws "${newklist}")
newklist=$(rm_duplicate_words "${newklist}")
#kernel_registry[${config}]=${newklist}
#printf -v "kernel_registry_${config}" %s "${newklist}"
assign_key_value "kernel_registry" "${config}" "${newklist}"
# Since we performed a substitution and changed the kernel
# list, mark the iteration flag to continue another round,
# unless we just substituted using a singleton family
# definition, in which case we don't necessarily need to
# iterate further this round.
if [ $(is_in_list "${ker}" "${kers_ker}") == "false" ]; then
iterate_again="1"
fi
fi
done
done
done
}
substitute_words()
{
local word new_words list newlist
word="$1"
new_words="$2"
list="$3"
for str in ${list}; do
if [ "${str}" == "${word}" ]; then
newlist="${newlist} ${new_words}"
else
newlist="${newlist} ${str}"
fi
done
echo "${newlist}"
}
build_kconfig_registry()
{
local familyname clist config kernels kernel cur_configs newvalue
familyname="$1"
#clist="${config_registry[${familyname}]}"
clist=$(query_array "config_registry" ${familyname})
for config in ${clist}; do
# Look up the kernels for the current sub-configuration.
#kernels="${kernel_registry[${config}]}"
kernels=$(query_array "kernel_registry" ${config})
for kernel in ${kernels}; do
# Add the sub-configuration to the list associated with the
# kernel.
# Query the current sub-configs for the current ${kernel}.
#cur_configs="${kconfig_registry[${kernel}]}"
cur_configs=$(query_array "kconfig_registry" ${kernel})
# Add the current sub-configuration to the list of sub-configs
# we just queried.
newvalue=$(canonicalize_ws "${cur_configs} ${config}")
# Update the array.
#kconfig_registry[${kernel}]="${newvalue}"
#printf -v "kconfig_registry_${kernel}" %s "${newvalue}"
assign_key_value "kconfig_registry" "${kernel}" "${newvalue}"
done
done
}
is_in_list()
{
local word list rval item
word="$1"
list="$2"
rval="false"
for item in ${list}; do
if [ "${item}" == "${word}" ]; then
rval="true"
break
fi
done
echo "${rval}"
}
is_singleton()
{
local list rval count_str item
list="$1"
rval="false"
count_str=""
for item in ${list}; do
count_str="${count_str}x"
done
if [ "${count_str}" == "x" ]; then
rval="true"
fi
echo "${rval}"
}
is_singleton_family()
{
local familyname memberlist rval
familyname="$1"
memberlist="$2"
rval="false"
if [ $(is_singleton "${memberlist}") ]; then
if [ "${memberlist}" == "${familyname}" ]; then
rval="true"
fi
fi
echo "${rval}"
}
remove_from_list()
{
local strike_list list flist item
strike_words="$1"
list="$2"
flist=""
for item in ${list}; do
# Filter out any list item that matches any of the strike words.
if [ $(is_in_list "${item}" "${strike_words}") == "false" ]; then
flist="${flist} ${item}"
fi
done
flist=$(canonicalize_ws "${flist}")
# Return the filtered list.
echo "${flist}"
}
canonicalize_ws()
{
local str
str="$1"
# Remove leading and trailing whitespace.
str=$(echo -e "${str}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')
# Remove duplicate spaces between words.
str=$(echo -e "${str}" | tr -s " ")
# Update the input argument.
echo "${str}"
}
rm_duplicate_words_simple()
{
local str revstr revres res
str="$1"
# Remote duplicates, keeping the first occurrence.
res=$(echo "${str}" | awk '{for (i=1;i<=NF;i++) if (!a[$i]++) printf("%s%s",$i,FS)}{printf("\n")}')
echo "${res}"
}
rm_duplicate_words()
{
local str revstr revres res
str="$1"
# We reverse the initial string, THEN remove duplicates, then reverse
# the de-duplicated result so that only the last instance is kept after
# removing duplicates (rather than keeping only the first). This is
# totally unnecessary but works well for the kinds of duplicates that
# show up in certain use cases of the config and kernel registries.
# For example, these gymnastics allow us to keep only the last instance
# of the 'generic' configuration in a configuration family that
# includes it twice or more.
revstr=$(echo "${str}" | awk '{ for (i=NF; i>1; i--) printf("%s ",$i); print $1; }')
revres=$(echo "${revstr}" | awk '{for (i=1;i<=NF;i++) if (!a[$i]++) printf("%s%s",$i,FS)}{printf("\n")}')
res=$(echo "${revres}" | awk '{ for (i=NF; i>1; i--) printf("%s ",$i); print $1; }')
echo "${res}"
}
get_cc_search_list()
{
local list
# For Linux, Darwin (OS X), and generic OSes, prioritize gcc.
list="gcc clang cc"
# For OpenBSD and FreeBSD, prioritize cc and clang over gcc.
if [ "${os_name}" = "OpenBSD" ]; then
list="cc clang gcc"
elif [ "${os_name}" = "FreeBSD" ]; then
list="cc clang gcc"
fi
echo "${list}"
}
get_cxx_search_list()
{
local list
# For Linux, Darwin (OS X), and generic OSes, prioritize g++.
list="g++ clang++ c++"
# For OpenBSD and FreeBSD, prioritize cc and clang over gcc.
if [ "${os_name}" = "OpenBSD" ]; then
list="c++ clang++ g++"
elif [ "${os_name}" = "FreeBSD" ]; then
list="c++ clang++ g++"
fi
echo "${list}"
}
select_tool()
{
local search_list CC_env the_cc cc
# This is the list of compilers/tools to search for, and the order in
# which to search for them.
search_list=$1
# The environment variable associated with the compiler/tool type we
# are searching (e.g. CC, CXX, PYTHON).
CC_env=$2
# If CC_env contains something, add it to the beginning of our default
# search list.
if [ -n "${CC_env}" ]; then
search_list="${CC_env} ${search_list}"
fi
# Initialize our selected compiler/tool to empty.
the_cc=""
# Try each compiler/tool in the list and select the first one we find that
# works.
for cc in ${search_list}; do
# See if the current compiler/tool works and/or is present.
${cc} --version > /dev/null 2>&1
if [ "$?" == 0 ]; then
the_cc=${cc}
break
fi
done
# Return the selected compiler/tool.
echo "${the_cc}"
}
auto_detect()
{
local cc cflags config_defines detected_config rval cmd
# Use the same compiler that was found earlier.
cc="${found_cc}"
# For debugging: reveal what compiler was chosen for auto-detection.
#touch "${cc}.txt"
# Tweak the flags we use based on the compiler. This is mostly just
# an opportunity to turn off annoying warnings that some compilers
# may throw off.
if [ "${cc}" == "clang" ]; then
cflags="-Wno-tautological-compare"
else
cflags=
fi
# Accumulate a list of source files we'll need to compile along with
# the top-level (root) directory in which they are located.
c_src_pairs=""
c_src_pairs="${c_src_pairs} frame:bli_arch.c"
c_src_pairs="${c_src_pairs} frame:bli_cpuid.c"
c_src_pairs="${c_src_pairs} frame:bli_env.c"
c_src_pairs="${c_src_pairs} build:config_detect.c"
# Accumulate a list of full filepaths to the source files listed above.
c_src_filepaths=""
for pair in ${c_src_pairs}; do
filename=${pair#*:}
rootdir=${pair%:*}
filepath=$(find ${dist_path}/${rootdir} -name "${filename}")
c_src_filepaths="${c_src_filepaths} ${filepath}"
done
# Accumulate a list of header files we'll need to locate along with
# the top-level (root) directory in which they are located.
c_hdr_pairs=""
c_hdr_pairs="${c_hdr_pairs} frame:bli_system.h"
c_hdr_pairs="${c_hdr_pairs} frame:bli_type_defs.h"
c_hdr_pairs="${c_hdr_pairs} frame:bli_arch.h"
c_hdr_pairs="${c_hdr_pairs} frame:bli_cpuid.h"
c_hdr_pairs="${c_hdr_pairs} frame:bli_env.h"
# NOTE: These headers are needed by bli_type_defs.h.
c_hdr_pairs="${c_hdr_pairs} frame:bli_malloc.h"
c_hdr_pairs="${c_hdr_pairs} frame:bli_pthread.h"
# Accumulate a list of full paths to the header files listed above.
# While we are at it, we include the "-I" compiler option to indicate
# adding the path to the list of directories to search when encountering
# #include directives.
c_hdr_paths=""
for pair in ${c_hdr_pairs}; do
filename=${pair#*:}
rootdir=${pair%:*}
filepath=$(find ${dist_path}/${rootdir} -name "${filename}")
path=${filepath%/*}
c_hdr_paths="${c_hdr_paths} -I${path}"
done
# Define the executable name.
autodetect_x="auto-detect.x"
# Create #defines for all of the BLIS_CONFIG_ macros in bli_cpuid.c.
bli_cpuid_c_filepath=$(find ${dist_path}/frame -name "bli_cpuid.c")
config_defines=$(grep BLIS_CONFIG_ ${bli_cpuid_c_filepath} \
| sed -e 's/#ifdef /-D/g')
# Set the linker flags. We typically need pthreads (or BLIS's homerolled
# equiavlent) because it is needed for parts of bli_arch.c unrelated to
# bli_arch_string(), which is called by the main() function in ${main_c}.
if [[ "$is_win" == "no" || "$cc_vendor" != "clang" ]]; then
ldflags="${LIBPTHREAD--lpthread}"
fi
# However, if --disable-system was given, we override the choice made above
# and do not use any pthread link flags.
if [[ "$enable_system" == "no" ]]; then
ldflags=
fi
# Compile the auto-detect program using source code inside the
# framework.
# NOTE: -D_GNU_SOURCE is needed to enable POSIX extensions to
# pthreads (i.e., barriers).
double_quote_open=\"\\\"
double_quote_close=\\\"\"
cmd="${cc} ${config_defines} \
-DBLIS_CONFIGURETIME_CPUID \
-D__blis_arch_type_name=${double_quote_open}${rename_blis_arch_type}${double_quote_close} \
-D__blis_model_type_name=${double_quote_open}${rename_blis_model_type}${double_quote_close} \
${c_hdr_paths} \
-std=c99 -D_GNU_SOURCE \
${cflags} \
${c_src_filepaths} \
${ldflags} \
-o ${autodetect_x}"
if [ "${debug_auto_detect}" == "no" ]; then
# Execute the compilation command.
eval ${cmd}
else
# Debugging stuff. Instead of executing ${cmd}, join the lines together
# with tr and trim excess whitespace via awk.
cmd=$(echo "${cmd}" | tr '\n' ' ' | awk '{$1=$1;print}')
echo "${cmd}"
return
fi
# Run the auto-detect program.
detected_config=$(./${autodetect_x})
# Remove the executable file.
rm -f ./${autodetect_x}
# Return the detected sub-configuration name.
echo "${detected_config}"
}
has_libmemkind()
{
local main_c main_c_filepath LDFLAGS_mk binname rval
# Path to libmemkind detection source file.
main_c="libmemkind_detect.c"
main_c_filepath=$(find ${dist_path}/build -name "${main_c}")
# Add libmemkind to LDFLAGS.
LDFLAGS_mk="${LDFLAGS} -lmemkind"
# Binary executable filename.
binname="libmemkind-detect.x"
# Attempt to compile a simple main() program that contains a call
# to hbw_malloc() and that links to libmemkind.
${found_cc} -o ${binname} ${main_c_filepath} ${LDFLAGS_mk} 2> /dev/null
# Depending on the return code from the compile step above, we set
# enable_memkind accordingly.
if [ "$?" == 0 ]; then
rval='yes'
else
rval='no'
fi
# Remove the executable generated above.
rm -f ./${binname}
echo "${rval}"
}
has_pragma_omp_simd()
{
local main_c main_c_filepath binname rval
# Path to omp-simd detection source file.
main_c="omp_simd_detect.c"
main_c_filepath=$(find ${dist_path}/build -name "${main_c}")
# Binary executable filename.
binname="omp_simd-detect.x"
# Attempt to compile a simple main() program that contains a
# #pragma omp simd.
${found_cc} -std=c99 -O3 -march=native -fopenmp-simd \
-o ${binname} ${main_c_filepath} 2> /dev/null
# Depending on the return code from the compile step above, we set
# enable_memkind accordingly.
if [ "$?" == 0 ]; then
rval='yes'
else
rval='no'
fi
# Remove the executable generated above.
rm -f ./${binname}
echo "${rval}"
}
echoerr()
{
printf "${script_name}: error: %s\n" "$*" #>&2;
}
echowarn()
{
printf "${script_name}: warning: %s\n" "$*" #>&2;
}
blacklistcc_add()
{
# Check whether we've already blacklisted the given sub-config so
# we don't output redundant messages.
if [ $(is_in_list "$1" "${config_blist}") == "false" ]; then
echowarn "${cc_vendor} ${cc_version} does not support '$1'; adding to blacklist."
config_blist="${config_blist} $1"
fi
}
blacklistbu_add()
{
# Check whether we've already blacklisted the given sub-config so
# we don't output redundant messages.
if [ $(is_in_list "$1" "${config_blist}") == "false" ]; then
echowarn "assembler ('as' ${bu_version}) does not support '$1'; adding to blacklist."
config_blist="${config_blist} $1"
fi
}
blacklist_init()
{
config_blist=""
}
blacklist_cleanup()
{
# Remove duplicates and whitespace from the blacklist.
config_blist=$(rm_duplicate_words "${config_blist}")
config_blist=$(canonicalize_ws "${config_blist}")
}
echoerr_unsupportedcc()
{
echoerr "${script_name}: *** Unsupported compiler version: ${cc_vendor} ${cc_version}."
exit 1
}
echoerr_unsupportedpython()
{
echoerr "${script_name}: *** Unsupported python version: ${python_version}."
exit 1
}
get_binutils_version()
{
binutil=${AS:-as}
# Query the full binutils version string output. This includes the
# version string along with (potentially) a bunch of other textual
# clutter.
if [ "$(uname -s)" == "Darwin" ]; then
# The default OS X assembler uses a trifecta of brain-dead
# conventions: responding only to '-v', hanging indefinitely if
# not given an argument, and outputing the result to stderr.
# (And if you still weren't convinced, it creates an 'a.out'
# by default. So yeah.)
bu_string=$(${binutil} -v /dev/null -o /dev/null 2>&1)
else
bu_string=$(${binutil} --version 2>/dev/null)
fi
# Query the binutils version number.
# The last part ({ read first rest ; echo $first ; }) is a workaround
# to OS X's egrep only returning the first match.
bu_version=$(echo "${bu_string}" | egrep -o '[0-9]+\.[0-9]+\.?[0-9]*' | { read first rest ; echo ${first} ; })
# Parse the version number into its major, minor, and revision
# components.
bu_major=$(echo "${bu_version}" | cut -d. -f1)
bu_minor=$(echo "${bu_version}" | cut -d. -f2)
bu_revision=$(echo "${bu_version}" | cut -d. -f3)
echo "${script_name}: found assembler ('as') version ${bu_version} (maj: ${bu_major}, min: ${bu_minor}, rev: ${bu_revision})."
}
get_python_search_list()
{
local list
# For Linux, Darwin (OS X), and generic OSes, prioritize 'python'.
list="python python3 python2"
echo "${list}"
}
get_python_version()
{
local python vendor_string
python="${found_python}"
# Query the python version. This includes the version number along
# with other text, such as "Python ".
# NOTE: Python seems to echo its version info to stderr, not
# stdout, and thus we redirect stderr to stdout and capture that.
vendor_string="$(${python} --version 2>&1)"
# Drop any preceding text and save only the first numbers and what
# comes after.
python_version=$(echo "${vendor_string}" | sed -e "s/[a-zA-Z_ ]* \([0-9]*\..*\)/\1/g")
# Parse the version number into its major, minor, and revision
# components.
python_major=$(echo "${python_version}" | cut -d. -f1)
python_minor=$(echo "${python_version}" | cut -d. -f2)
python_revision=$(echo "${python_version}" | cut -d. -f3)
echo "${script_name}: found python version ${python_version} (maj: ${python_major}, min: ${python_minor}, rev: ${python_revision})."
}
check_python()
{
local python
python="${found_python}"
#
# Python requirements
#
# python1: no versions supported
# python2: 2.7+
# python3: 3.4+
#
# NOTE: It's actually unclear whether python 3.0 through 3.3.x would work.
# Python 3.5 is the oldest python3 that I have available to test with, and
# I only know that 3.4 will work thanks to feedback from Dave Love. So it's
# quite possible that some of those "unsupported" python3 versions are
# sufficient. -FGVZ
#
# Python 1.x is unsupported.
if [ ${python_major} -eq 1 ]; then
echoerr_unsupportedpython
fi
# Python 2.6.x or older is unsupported.
if [ ${python_major} -eq 2 ]; then
if [ ${python_minor} -lt 7 ]; then
echoerr_unsupportedpython
fi
fi
# Python 3.3.x or older is unsupported.
if [ ${python_major} -eq 3 ]; then
if [ ${python_minor} -lt 4 ]; then
echoerr_unsupportedpython
fi
fi
echo "${script_name}: python ${python_version} appears to be supported."
}
get_compiler_version()
{
local cc vendor_string
cc="${found_cc}"
# Query the full vendor version string output. This includes the
# version number along with (potentially) a bunch of other textual
# clutter.
# NOTE: This maybe should use merged stdout/stderr rather than only
# stdout. But it works for now.
vendor_string="$(${cc} --version 2>/dev/null)"
# Query the compiler "vendor" (ie: the compiler's simple name) and
# isolate the version number.
# The last part ({ read first rest ; echo $first ; }) is a workaround
# to OS X's egrep only returning the first match.
cc_vendor=$(echo "${vendor_string}" | egrep -o 'icc|gcc|clang|emcc|pnacl|IBM|oneAPI|crosstool-NG' | { read first rest ; echo $first ; })
if [ "${cc_vendor}" = "crosstool-NG" ]; then
# Treat compilers built by crosstool-NG (for eg: conda) as gcc.
cc_vendor="gcc"
fi
if [ "${cc_vendor}" = "icc" -o \
"${cc_vendor}" = "gcc" ]; then
cc_version=$(${cc} -dumpversion)
# If compiler is AOCC, first grep for clang and then the version number.
elif [ "${cc_vendor}" = "clang" ]; then
cc_version=$(echo "${vendor_string}" | egrep -o '(clang|LLVM) version [0-9]+\.[0-9]+\.?[0-9]*' | egrep -o '[0-9]+\.[0-9]+\.?[0-9]*')
elif [ "${cc_vendor}" = "oneAPI" ]; then
# Treat Intel oneAPI's clang as clang, not icc.
cc_vendor="clang"
cc_version=$(echo "${vendor_string}" | egrep -o '[0-9]+\.[0-9]+\.[0-9]+\.?[0-9]*' | { read first rest ; echo ${first} ; })
else
cc_version=$(echo "${vendor_string}" | egrep -o '[0-9]+\.[0-9]+\.?[0-9]*' | { read first rest ; echo ${first} ; })
fi
# Parse the version number into its major, minor, and revision
# components.
cc_major=$(echo "${cc_version}" | cut -d. -f1)
cc_minor=$(echo "${cc_version}" | cut -d. -f2)
cc_revision=$(echo "${cc_version}" | cut -d. -f3)
# gcc 7 introduced new behavior to -dumpversion whereby only the major
# version component is output. However, as part of this change, gcc 7
# also introduced a new option, -dumpfullversion, which is guaranteed to
# always output the major, minor, and revision numbers. Thus, if we're
# using gcc and its version is 7 or later, we re-query and re-parse the
# version string.
if [ "${cc_vendor}" = "gcc" -a ${cc_major} -ge 7 ]; then
# Re-query the version number using -dumpfullversion.
cc_version=$(${cc} -dumpfullversion)
# And parse the result.
cc_major=$(echo "${cc_version}" | cut -d. -f1)
cc_minor=$(echo "${cc_version}" | cut -d. -f2)
cc_revision=$(echo "${cc_version}" | cut -d. -f3)
fi
echo "${script_name}: found ${cc_vendor} version ${cc_version} (maj: ${cc_major}, min: ${cc_minor}, rev: ${cc_revision})."
}
check_compiler()
{
local cc
cc="${found_cc}"
#
# Compiler requirements
#
# General:
#
# icc 15+, gcc 4.7+, clang 3.3+
#
# Specific:
#
# skx: icc 15.0.1+, gcc 6.0+, clang 3.9+
# knl: icc 14.0.1+, gcc 5.0-14, clang 3.9+
# haswell: any
# sandybridge: any
# penryn: any
#
# zen: gcc 6.0+[1], clang 4.0+
# excavator: gcc 4.9+, clang 3.5+
# steamroller: any
# piledriver: any
# bulldozer: any
#
# cortexa57: any
# cortexa15: any
# cortexa9: any
#
# armsve: clang11+, gcc10+
#
# generic: any
#
# Note: These compiler requirements were originally modeled after similar
# requirements encoded into TBLIS's configure.ac [2].
#
# [1] While gcc 6.0 or newer is needed for zen support (-march=znver1),
# we relax this compiler version constraint a bit by targeting bdver4
# and then disabling the instruction sets that were removed in the
# transition from bdver4 to znver1. (See config/zen/make_defs.mk for
# the specific compiler flags used.)
# [2] https://github.com/devinamatthews/tblis/
#
echo "${script_name}: checking for blacklisted configurations due to ${cc} ${cc_version}."
# Fixme: check on a64fx, neoverse, and others
# gcc
if [ "x${cc_vendor}" = "xgcc" ]; then
if [ ${cc_major} -lt 4 ]; then
echoerr_unsupportedcc
fi
if [ ${cc_major} -eq 4 ]; then
blacklistcc_add "knl"
if [ ${cc_minor} -lt 7 ]; then
echoerr_unsupportedcc
fi
if [ ${cc_minor} -lt 9 ]; then
blacklistcc_add "excavator"
blacklistcc_add "zen"
fi
fi
if [[ ${cc_major} -lt 5 ]] || [[ ${cc_major} -gt 14 ]]; then
blacklistcc_add "knl"
fi
if [ ${cc_major} -lt 6 ]; then
# Normally, zen would be blacklisted for gcc prior to 6.0.
# However, we have a workaround in place in the zen
# configuration's make_defs.mk file that starts with bdver4
# and disables the instructions that were removed in znver1.
# Thus, this "blacklistcc_add" statement has been moved above.
#blacklistcc_add "zen"
blacklistcc_add "skx"
# gcc 5.x may support POWER9 but it is unverified.
blacklistcc_add "power9"
fi
if [ ${cc_major} -lt 10 ]; then
blacklistcc_add "armsve"
fi
fi
# icc
if [ "x${cc_vendor}" = "xicc" ]; then
if [ ${cc_major} -lt 15 ]; then
echoerr_unsupportedcc
fi
if [ ${cc_major} -eq 15 ]; then
if [ ${cc_revision} -lt 1 ]; then
blacklistcc_add "skx"
fi
fi
if [ ${cc_major} -eq 18 ]; then
echo "${script_name}: ${cc} ${cc_version} is known to cause erroneous results. See https://github.com/flame/blis/issues/371 for details."
blacklistcc_add "knl"
blacklistcc_add "skx"
fi
if [ ${cc_major} -ge 19 ]; then
echo "${script_name}: ${cc} ${cc_version} is known to cause erroneous results. See https://github.com/flame/blis/issues/371 for details."
echoerr_unsupportedcc
fi
fi
# clang
if [ "x${cc_vendor}" = "xclang" ]; then
if [ "$(echo ${vendor_string} | grep -o Apple)" = "Apple" ]; then
if [ ${cc_major} -lt 5 ]; then
echoerr_unsupportedcc
fi
# See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions
if [ ${cc_major} -eq 5 ]; then
# Apple clang 5.0 is clang 3.4svn
blacklistcc_add "excavator"
blacklistcc_add "zen"
fi
if [ ${cc_major} -lt 7 ]; then
blacklistcc_add "knl"
blacklistcc_add "skx"
fi
else
if [ ${cc_major} -lt 3 ]; then
echoerr_unsupportedcc
fi
if [ ${cc_major} -eq 3 ]; then
if [ ${cc_minor} -lt 3 ]; then
echoerr_unsupportedcc
fi
if [ ${cc_minor} -lt 5 ]; then
blacklistcc_add "excavator"
blacklistcc_add "zen"
fi
if [ ${cc_minor} -lt 9 ]; then
blacklistcc_add "knl"
blacklistcc_add "skx"
fi
fi
if [ ${cc_major} -lt 4 ]; then
# See comment above regarding zen support.
#blacklistcc_add "zen"
: # explicit no-op since bash can't handle empty loop bodies.
fi
if [ ${cc_major} -lt 11 ]; then
blacklistcc_add "armsve"
fi
fi
fi
}
check_compiler_version_ranges()
{
local cc
cc="${found_cc}"
#
# We check for various compiler version ranges that may cause us
# issues in properly supporting those compiler versions within the
# BLIS build system.
#
# range: gcc < 4.9.0 (ie: 4.8.5 or older)
# variable: gcc_older_than_4_9_0
# comments:
# These older versions of gcc may support microarchitectures such as
# sandybridge, but the '-march=' flag uses a different label syntax.
# In newer versions, '-march=sandybridge' is the preferred syntax [1].
# However, in older versions, the syntax for the same compiler option
# is '-march=corei7-avx' [2].
#
# [1] https://gcc.gnu.org/onlinedocs/gcc-4.9.0/gcc/i386-and-x86-64-Options.html#i386-and-x86-64-Options
# [2] https://gcc.gnu.org/onlinedocs/gcc-4.8.5/gcc/i386-and-x86-64-Options.html#i386-and-x86-64-Options
#
# range: gcc < 6.1 (ie: 5.5 or older)
# variable: gcc_older_than_6_1_0
# comments:
# These older versions of gcc do not explicitly support the Zen (Zen1)
# microarchitecture; the newest microarchitectural value understood by
# these versions is '-march=bdver4' [3]. However, basic support for these
# older versions can be attained in a roundabout way by starting with the
# instruction sets enabled by '-march=bdver4' and then disabling the
# instruction sets that were removed in the transition from Excavator to
# Zen, namely: FMA4, TBM, XOP, and LWP. Newer versions of gcc support Zen
# via the '-march=znver1' option [4].
#
# [3] https://gcc.gnu.org/onlinedocs/gcc-5.5.0/gcc/x86-Options.html#x86-Options
# [4] https://gcc.gnu.org/onlinedocs/gcc-6.1.0/gcc/x86-Options.html#x86-Options
#
# range: gcc < 9.1 (ie: 8.3 or older)
# variable: gcc_older_than_9_1_0
# comments:
# These older versions of gcc do not explicitly support the Zen2
# microarchitecture; the newest microarchitectural value understood by
# these versions is either '-march=znver1' (if !gcc_older_than_6_1_0) [5]
# or '-march=bdver4' (if gcc_older_than_6_1_0) [3]. If gcc is 6.1 or
# newer, '-march=znver1' may be used (since the instruction sets it
# enables are a subset of those enabled by '-march=znver2'); otherwise,
# '-march=bdver4' must be used in conjuction with disabling the
# instruction sets that were removed in the transition from Excavator to
# Zen, as described in the section above for gcc_older_than_6_1_0.
# Newer versions of gcc support Zen2 via the '-march=znver2' option [6].
#
# [5] https://gcc.gnu.org/onlinedocs/gcc-8.3.0/gcc/x86-Options.html#x86-Options
# [6] https://gcc.gnu.org/onlinedocs/gcc-9.1.0/gcc/x86-Options.html#x86-Options
#
gcc_older_than_4_9_0='no'
gcc_older_than_6_1_0='no'
gcc_older_than_9_1_0='no'
gcc_older_than_11_2_0='no'
echo "${script_name}: checking ${cc} ${cc_version} against known consequential version ranges."
# gcc
if [ "x${cc_vendor}" = "xgcc" ]; then
# Check for gcc < 4.9.0 (ie: 4.8.5 or older).
if [ ${cc_major} -eq 4 ]; then
if [ ${cc_minor} -lt 9 ]; then
echo "${script_name}: note: found ${cc} version older than 4.9.0."
gcc_older_than_4_9_0='yes'
fi
fi
# Check for gcc < 6.1.0 (ie: 5.5 or older).
if [ ${cc_major} -lt 6 ]; then
echo "${script_name}: note: found ${cc} version older than 6.1."
gcc_older_than_6_1_0='yes'
fi
# Check for gcc < 9.1.0 (ie: 8.3 or older).
if [ ${cc_major} -lt 9 ]; then
echo "${script_name}: note: found ${cc} version older than 9.1."
gcc_older_than_9_1_0='yes'
fi
# Check for gcc < 11.2.0 (ie: 11.2 or older).
if [ ${cc_major} -lt 11 ]; then
echo "${script_name}: note: found ${cc} version older than 11.2.0."
gcc_older_than_11_2_0='yes'
else
if [ ${cc_major} -eq 11 ]; then
if [ ${cc_minor} -lt 2 ]; then
echo "${script_name}: note: found ${cc} version older than 11.2.0."
gcc_older_than_11_2_0='yes'
fi
fi
fi
fi
# icc
if [ "x${cc_vendor}" = "xicc" ]; then
:
fi
# clang
if [ "x${cc_vendor}" = "xclang" ]; then
:
fi
}
check_assembler()
{
local cc asm_dir cflags asm_fp
cc="${found_cc}"
# The directory where the assembly files will be.
asm_dir="${dist_path}/build"
# Most of the time, we won't need any additional compiler flags.
cflags=""
echo "${script_name}: checking for blacklisted configurations due to as ${bu_version}."
#
# Check support for FMA4 (amd: bulldozer).
#
asm_fp=$(find ${asm_dir} -name "fma4.s")
knows_fma4=$(try_assemble "${cc}" "${cflags}" "${asm_fp}")
if [ "x${knows_fma4}" == "xno" ]; then
blacklistbu_add "bulldozer"
fi
#
# Check support for AVX (intel: sandybridge+, amd: piledriver+).
#
asm_fp=$(find ${asm_dir} -name "avx.s")
knows_avx=$(try_assemble "${cc}" "${cflags}" "${asm_fp}")
if [ "x${knows_avx}" == "xno" ]; then
blacklistbu_add "sandybridge"
fi
#
# Check support for FMA3 (intel: haswell+, amd: piledriver+).
#
asm_fp=$(find ${asm_dir} -name "fma3.s")
knows_fma3=$(try_assemble "${cc}" "${cflags}" "${asm_fp}")
if [ "x${knows_fma3}" == "xno" ]; then
blacklistbu_add "haswell"
blacklistbu_add "piledriver"
blacklistbu_add "steamroller"
blacklistbu_add "excavator"
blacklistbu_add "skx"
fi
#
# Check support for AVX-512f (knl, skx).
#
# The assembler on OS X won't recognize AVX-512 without help.
if [ "${cc_vendor}" == "clang" ]; then
cflags="-march=knl"
fi
asm_fp=$(find ${asm_dir} -name "avx512f.s")
knows_avx512f=$(try_assemble "${cc}" "${cflags}" "${asm_fp}")
if [ "x${knows_avx512f}" == "xno" ]; then
blacklistbu_add "knl"
blacklistbu_add "skx"
fi
#
# Check support for AVX-512dq (skx).
#
# The assembler on OS X won't recognize AVX-512 without help.
if [ "${cc_vendor}" == "clang" ]; then
cflags="-march=skylake-avx512"
fi
asm_fp=$(find ${asm_dir} -name "avx512dq.s")
knows_avx512dq=$(try_assemble "${cc}" "${cflags}" "${asm_fp}")
if [ "x${knows_avx512dq}" == "xno" ]; then
blacklistbu_add "skx"
fi
}
try_assemble()
{
local cc cflags asm_src asm_base asm_bin rval
cc="$1"
cflags="$2"
asm_src="$3"
# Construct the filename to the .o file corresponding to asm_src.
# (Strip the filepath, then the file extension, and then add ".o".)
asm_base=${asm_src##*/}
asm_base=${asm_base%.*}
asm_bin="${asm_base}.o"
# Try to assemble the file.
${cc} ${cflags} -c ${asm_src} -o ${asm_bin} > /dev/null 2>&1
if [ "$?" == 0 ]; then
rval='yes'
else
rval='no'
fi
# Remove the object file.
rm -f "${asm_bin}"
# Return the result.
echo "${rval}"
}
set_default_version()
{
# The path to the version file.
version_file=$1
echo "${script_name}: determining default version string."
# Use what's in the version file as-is.
version="AOCL-BLAS $(cat "${version_file}") Build $(date +%Y%m%d)"
}
#
# -- main function -------------------------------------------------------------
#
main()
{
#declare -A config_registry
#declare -A kernel_registry
#declare -A kconfig_registry
# -- Basic names and paths --
# The name of the script, stripped of any preceeding path.
script_name=${0##*/}
# The path to the script. We need this to find the top-level directory
# of the source distribution in the event that the user has chosen to
# build elsewhere.
dist_path=${0%/${script_name}}
# The path to the directory in which we are building. We do this to
# make explicit that we distinguish between the top-level directory
# of the distribution and the directory in which we are building.
cur_dirpath="."
# The file in which the version string is kept.
version_file="version"
version_filepath="${dist_path}/${version_file}"
# The name of and path to the directory named "build" in the top-level
# directory of the source distribution.
build_dir='build'
build_dirpath="${dist_path}/${build_dir}"
# The name/path to the registry (master list) of supported configurations.
registry_file="config_registry"
registry_filepath=${dist_path}/${registry_file}
# The names/paths for the template config.mk.in and its instantiated
# counterpart.
config_mk_in='config.mk.in'
config_mk_out='config.mk'
config_mk_in_path="${build_dirpath}/${config_mk_in}"
config_mk_out_path="${cur_dirpath}/${config_mk_out}"
# The names/paths for the template bli_config.h.in and its instantiated
# counterpart.
bli_config_h_in='bli_config.h.in'
bli_config_h_out='bli_config.h'
bli_config_h_in_path="${build_dirpath}/${bli_config_h_in}"
bli_config_h_out_path="${cur_dirpath}/${bli_config_h_out}"
# The names/paths for the template bli_addon.h.in and its instantiated
# counterpart.
bli_addon_h_in='bli_addon.h.in'
bli_addon_h_out='bli_addon.h'
bli_addon_h_in_path="${build_dirpath}/${bli_addon_h_in}"
bli_addon_h_out_path="${cur_dirpath}/${bli_addon_h_out}"
# Path to 'mirror-tree.sh' script.
mirror_tree_sh="${build_dirpath}/mirror-tree.sh"
# Path to 'gen-make-frags.sh' script and directory.
gen_make_frags_dirpath="${build_dirpath}/gen-make-frags"
gen_make_frags_sh="${gen_make_frags_dirpath}/gen-make-frag.sh"
# The name of the (top-level) configuration directory.
config_dir='config'
config_dirpath="${dist_path}/${config_dir}"
# The name of the (top-level) kernels directory.
kernels_dir='kernels'
kernels_dirpath="${dist_path}/${kernels_dir}"
# The name of the (top-level) reference kernels directory.
refkern_dir='ref_kernels'
refkern_dirpath="${dist_path}/${refkern_dir}"
# The root directory of the BLIS framework.
frame_dir='frame'
frame_dirpath="${dist_path}/${frame_dir}"
# The root directory of the BLIS framework.
aocldtl_dir='aocl_dtl'
aocldtl_dirpath="${dist_path}/${aocldtl_dir}"
# The names of the addons.
addon_dir='addon'
addon_dirpath="${dist_path}/${addon_dir}"
# The name of the sandbox directory.
sandbox_dir='sandbox'
sandbox_dirpath="${dist_path}/${sandbox_dir}"
# The name of the directory in which object files will be kept.
obj_dir='obj'
obj_dirpath="${cur_dirpath}/${obj_dir}"
# The name of the directory in which libraries will be kept.
lib_dir='lib'
lib_dirpath="${cur_dirpath}/${lib_dir}"
# The name of the directory in which headers will be kept.
include_dir='include'
include_dirpath="${cur_dirpath}/${include_dir}"
# The name of the directory in which the BLAS test suite is kept.
blastest_dir='blastest'
# The name of the directory in which the BLIS test suite is kept.
testsuite_dir='testsuite'
# -- Version-related --
# The shared library (.so) version file.
so_version_file='so_version'
so_version_filepath="${dist_path}/${so_version_file}"
# The major and minor/build .so version numbers.
so_version_major=''
so_version_minorbuild=''
# -- configure options --
# Define the default prefix so that the print_usage() function can
# output it in the --help text.
prefix_def='/usr/local'
# The installation prefix, assigned its default value, and a flag to
# track whether or not it was given by the user.
prefix=${prefix_def}
prefix_flag=''
# The installation exec_prefix, assigned its default value, and a flag to
# track whether or not it was given by the user.
exec_prefix='${prefix}'
exec_prefix_flag=''
# The installation libdir, assigned its default value, and a flag to
# track whether or not it was given by the user.
libdir='${exec_prefix}/lib'
libdir_flag=''
# The installation includedir, assigned its default value, and a flag to
# track whether or not it was given by the user.
includedir='${prefix}/include'
includedir_flag=''
# The installation sharedir, assigned its default value, and a flag to
# track whether or not it was given by the user.
sharedir='${prefix}/share'
sharedir_flag=''
# The preset value of CFLAGS and LDFLAGS (ie: compiler and linker flags
# to use in addition to those determined by the build system).
cflags_preset=''
ldflags_preset=''
# The user-given debug type and a flag indicating it was given.
debug_type=''
debug_flag=''
# The system flag.
enable_system='yes'
# The threading flag.
threading_model='off'
# The method of assigning micropanels to threads in the JR and JR loops.
thread_part_jrir='slab'
# Option variables.
quiet_flag=''
show_config_list=''
# Additional flags.
enable_verbose='no'
enable_arg_max_hack='no'
enable_static='yes'
enable_shared='yes'
enable_rpath='no'
export_shared='public'
enable_pba_pools='yes'
enable_sba_pools='yes'
enable_mem_tracing='no'
int_type_size=0
blas_int_type_size=32
enable_blas='yes'
enable_cblas='no'
enable_mixed_dt='yes'
enable_mixed_dt_extra_mem='yes'
enable_mnk1_matrix='yes'
enable_tiny_matrix='yes'
enable_small_matrix='yes'
enable_sup_handling='yes'
enable_small_matrix_trsm='yes'
enable_trsm_preinversion='yes'
enable_memkind='' # The default memkind value is determined later on.
enable_aocl_dynamic='yes'
enable_security_flags='yes'
force_version='no'
complex_return='default'
disable_blis_arch_type='unset'
rename_blis_arch_type='BLIS_ARCH_TYPE'
rename_blis_model_type='BLIS_MODEL_TYPE'
# DTL tracing/logging flag.
enable_aocl_dtl='off'
aocl_dtl_trace_level_number='unset'
# The addon flag and names.
addon_flag=''
addon_list=''
# The sandbox flag and name.
sandbox_flag=''
sandbox=''
# -- Configuration registry --
# The name of the chosen configuration (the configuration "family").
config_name=''
# The list of sub-configurations associated with config_name.
config_list=''
# The list of kernel sets that will be needed by the sub-configurations
# in config_list..
kernel_list=''
# The list of kernel:sub-configuration pairs for all kernels contained
# in kernel_list.
kconfig_map=''
# -- Out-of-tree --
# Whether we are building out-of-tree.
configured_oot="no"
# Dummy file. Used to check whether the cwd is the same as the top-level
# source distribution directory.
dummy_file='_blis_dir_detect.tmp'
# -- Debugging --
# A global flag to help debug the compilation command for the executable
# that configure builds on-the-fly to perform hardware auto-detection.
debug_auto_detect="no"
# -- Command line option/argument parsing ----------------------------------
found=true
while $found = true; do
# Process our command line options.
unset OPTIND
while getopts ":hp:d:e:a:s:t:r:qci:b:-:" opt; do
case $opt in
-)
case "$OPTARG" in
help)
print_usage
;;
quiet)
quiet_flag=1
;;
prefix=*)
prefix_flag=1
prefix=${OPTARG#*=}
;;
exec-prefix=*)
exec_prefix_flag=1
exec_prefix=${OPTARG#*=}
;;
libdir=*)
libdir_flag=1
libdir=${OPTARG#*=}
;;
includedir=*)
includedir_flag=1
includedir=${OPTARG#*=}
;;
sharedir=*)
sharedir_flag=1
sharedir=${OPTARG#*=}
;;
enable-debug)
debug_flag=1
debug_type=noopt
;;
enable-debug=*)
debug_flag=1
debug_type=${OPTARG#*=}
;;
disable-debug)
debug_flag=0
;;
enable-verbose-make)
enable_verbose='yes'
;;
disable-verbose-make)
enable_verbose='no'
;;
enable-arg-max-hack)
enable_arg_max_hack='yes'
;;
disable-arg-max-hack)
enable_arg_max_hack='no'
;;
enable-static)
enable_static='yes'
;;
disable-static)
enable_static='no'
;;
enable-shared)
enable_shared='yes'
;;
disable-shared)
enable_shared='no'
;;
enable-rpath)
enable_rpath='yes'
;;
disable-rpath)
enable_rpath='no'
;;
export-shared=*)
export_shared=${OPTARG#*=}
;;
enable-system)
enable_system='yes'
;;
disable-system)
enable_system='no'
;;
enable-threading=*)
threading_model=${OPTARG#*=}
;;
disable-threading)
threading_model='off'
;;
thread-part-jrir=*)
thread_part_jrir=${OPTARG#*=}
;;
enable-pba-pools)
enable_pba_pools='yes'
;;
disable-pba-pools)
enable_pba_pools='no'
;;
enable-sba-pools)
enable_sba_pools='yes'
;;
disable-sba-pools)
enable_sba_pools='no'
;;
enable-mem-tracing)
enable_mem_tracing='yes'
;;
disable-mem-tracing)
enable_mem_tracing='no'
;;
enable-addon=*)
addon_flag=1
addon_name=${OPTARG#*=}
# Append the addon name to the list.
addon_list="${addon_list} ${addon_name}"
;;
disable-addon)
addon_flag=''
;;
enable-sandbox=*)
sandbox_flag=1
sandbox=${OPTARG#*=}
;;
disable-sandbox)
sandbox_flag=''
;;
int-size=*)
int_type_size=${OPTARG#*=}
;;
blas-int-size=*)
blas_int_type_size=${OPTARG#*=}
;;
enable-blas)
enable_blas='yes'
;;
disable-blas)
enable_blas='no'
;;
enable-cblas)
enable_cblas='yes'
;;
disable-cblas)
enable_cblas='no'
;;
enable-mixed-dt)
enable_mixed_dt='yes'
;;
disable-mixed-dt)
enable_mixed_dt='no'
;;
enable-mixed-dt-extra-mem)
enable_mixed_dt_extra_mem='yes'
;;
disable-mixed-dt-extra-mem)
enable_mixed_dt_extra_mem='no'
;;
enable-mnk1-matrix)
enable_mnk1_matrix='yes'
;;
disable-mnk1-matrix)
enable_mnk1_matrix='no'
;;
enable-tiny-matrix)
enable_tiny_matrix='yes'
;;
disable-tiny-matrix)
enable_tiny_matrix='no'
;;
enable-small-matrix)
enable_small_matrix='yes'
;;
disable-small-matrix)
enable_small_matrix='no'
;;
enable-sup-handling)
enable_sup_handling='yes'
;;
disable-sup-handling)
enable_sup_handling='no'
;;
enable-small-matrix-trsm)
enable_small_matrix_trsm='yes'
;;
disable-small-matrix-trsm)
enable_small_matrix_trsm='no'
;;
with-memkind)
enable_memkind='yes'
;;
without-memkind)
enable_memkind='no'
;;
enable-trsm-preinversion)
enable_trsm_preinversion='yes'
;;
disable-trsm-preinversion)
enable_trsm_preinversion='no'
;;
enable-aocl-dynamic)
enable_aocl_dynamic='yes'
;;
disable-aocl-dynamic)
enable_aocl_dynamic='no'
;;
enable-security-flags)
enable_security_flags='yes'
;;
disable-security-flags)
enable_security_flags='no'
;;
force-version=*)
force_version=${OPTARG#*=}
;;
show-config-list)
show_config_list=1
;;
complex-return=*)
complex_return=${OPTARG#*=}
;;
enable-blis-arch-type)
disable_blis_arch_type='no'
;;
disable-blis-arch-type)
disable_blis_arch_type='yes'
;;
rename-blis-arch-type=*)
rename_blis_arch_type=${OPTARG#*=}
;;
rename-blis-model-type=*)
rename_blis_model_type=${OPTARG#*=}
;;
enable-aocl-dtl=*)
enable_aocl_dtl=${OPTARG#*=}
;;
disable-aocl-dtl=*)
enable_aocl_dtl='off'
;;
aocl-dtl-trace-level=*)
aocl_dtl_trace_level_number=${OPTARG#*=}
;;
*)
print_usage
;;
esac;;
h)
print_usage
;;
p)
prefix_flag=1
prefix=$OPTARG
;;
d)
debug_flag=1
debug_type=$OPTARG
;;
e)
export_shared=$OPTARG
;;
a)
addon_flag=1
addon_name=$OPTARG
# Append the addon name to the list.
addon_list="${addon_list} ${addon_name}"
;;
s)
sandbox_flag=1
sandbox=$OPTARG
;;
q)
quiet_flag=1
;;
t)
threading_model=$OPTARG
;;
r)
thread_part_jrir=$OPTARG
;;
i)
int_type_size=$OPTARG
;;
b)
blas_int_type_size=$OPTARG
;;
c)
show_config_list=1
;;
\?)
print_usage
;;
esac
done
shift $(($OPTIND - 1))
# Parse environment variables
found=false
while [ $# -gt 0 ]; do
case $1 in
*=*)
var=`expr "$1" : '\([^=]*\)='`
value=`expr "$1" : '[^=]*=\(.*\)'`
eval $var=\$value
export $var
shift
found=true
;;
*)
break
;;
esac
done
done
# -- Check the operating system --------------------------------------------
os_name=$(uname -s)
os_vers=$(uname -r)
echo "${script_name}: detected ${os_name} kernel version ${os_vers}."
# Define a single variable off of which we can branch to tell if we are
# building for Windows.
is_win=no
if [[ $os_name == MSYS* ]] || \
[[ $os_name == MINGW* ]] || \
[[ $os_name == CYGWIN* ]] ; then
is_win=yes
fi
# -- Find a python interpreter ---------------------------------------------
# Acquire the python search order. This may vary based on the os found
# above.
python_search_list=$(get_python_search_list)
echo "${script_name}: python interpeter search list is: ${python_search_list}."
# Find a working python interpreter.
found_python=$(select_tool "${python_search_list}" "${PYTHON}")
# If we didn't find any working python interpreters, we print an error
# message.
if [ -z "${found_python}" ]; then
echo "${script_name}: *** Could not find working python interperter! Cannot continue."
exit 1
fi
echo "${script_name}: using '${found_python}' python interpreter."
# -- Check the python version ----------------------------------------------
# Check the python interpreter's version.
get_python_version
check_python
# -- Find a C compiler -----------------------------------------------------
# Acquire the compiler search order. This will vary based on the os found
# above.
cc_search_list=$(get_cc_search_list)
echo "${script_name}: C compiler search list is: ${cc_search_list}."
# Find a working C compiler.
found_cc=$(select_tool "${cc_search_list}" "${CC}")
# If we didn't find any working C compilers, we print an error message.
if [ -z "${found_cc}" ]; then
echo "${script_name}: *** Could not find working C compiler! Cannot continue."
exit 1
fi
echo "${script_name}: using '${found_cc}' C compiler."
# Also check the compiler to see if we are (cross-)compiling for Windows
if ${found_cc} -dM -E - < /dev/null 2> /dev/null | grep -q _WIN32; then
is_win=yes
fi
# -- Find a C++ compiler ---------------------------------------------------
# Acquire the compiler search order. This will vary based on the os
# found above.
cxx_search_list=$(get_cxx_search_list)
echo "${script_name}: C++ compiler search list is: ${cxx_search_list}."
# Find a working C++ compiler. NOTE: We can reuse the select_tool()
# function since it is written in a way that is general-purpose.
found_cxx=$(select_tool "${cxx_search_list}" "${CXX}")
# If we didn't find any working C++ compilers, we print an error message.
if [ -z "${found_cxx}" ]; then
echo "${script_name}: Could not find working C++ compiler! C++ will not be available in sandbox."
found_cxx="c++notfound"
fi
echo "${script_name}: using '${found_cxx}' C++ compiler (for sandbox only)."
# -- Check the compiler version --------------------------------------------
# Initialize the blacklist to empty.
blacklist_init
# Check the compiler's version. Certain versions of certain compilers
# will preclude building certain sub-configurations, which are added
# to a blacklist. We also make note of certain version ranges that
# will be useful to know about later.
get_compiler_version
check_compiler
check_compiler_version_ranges
# Now check the assembler's ability to assemble code. Older versions
# of binutils may not be aware of certain instruction sets. Those
# sub-configurations employing kernels that use such instruction sets
# will also be blacklisted.
get_binutils_version
check_assembler
# Remove duplicates and whitespace from the blacklist.
blacklist_cleanup
if [ -n "${config_blist}" ]; then
echo "${script_name}: configuration blacklist:"
echo "${script_name}: ${config_blist}"
fi
# -- Read the configuration registry ---------------------------------------
# Make sure the config registry file exists and can be opened.
if [ ! -f "${registry_filepath}" ]; then
echo "${script_name}: could not open '${registry_file}' file; cannot continue."
echo "${script_name}: BLIS distribution appears to be incomplete."
echo "${script_name}: *** Please verify source distribution."
exit 1
fi
# Read the registered configuration names and lists into associative
# arrays.
echo -n "${script_name}: reading configuration registry..."
read_registry_file ${registry_filepath}
echo "done."
# Report if additional configurations needed to be blacklisted.
# NOTE: This branch should never execute so long as indirect blacklisting
# is disabled. See comment regarding issue #214 in the definition of
# pass_config_kernel_registries().
if [ -n "${indirect_blist}" ]; then
echo "${script_name}: needed to indirectly blacklist additional configurations:"
echo "${script_name}: ${indirect_blist}"
fi
# -- Acquire the BLIS version ----------------------------------------------
# Set the 'version' variable to the default value (the 'git describe'
# augmented instance of whatever is in the 'version' file if this is a git
# clone, or whatever is in the 'version' file unmodified if it is a bare
# source release).
set_default_version "${version_filepath}"
# Initial message.
echo "${script_name}: starting configuration of BLIS ${version}."
# Check if the user requested a custom version string.
if [ "x${force_version}" = "xno" ]; then
echo "${script_name}: configuring with official version string."
else
echo "${script_name}: configuring with custom version string '${force_version}'."
version="${force_version}"
fi
# -- Acquire the shared library (.so) versions -----------------------------
# The first line of the 'so_version' file contains the .so major version.
so_version_major=$(cat ${so_version_filepath} | sed -n "1p")
# The second line contains the minor and build .so version numbers
# (separated by a '.').
so_version_minorbuild=$(cat ${so_version_filepath} | sed -n "2p")
echo "${script_name}: found shared library .so version '${so_version_major}.${so_version_minorbuild}'."
echo "${script_name}: .so major version: ${so_version_major}"
echo "${script_name}: .so minor.build version: ${so_version_minorbuild}"
# -- Various pre-configuration checks --------------------------------------
# Set config_name based on the number of arguments leftover (after command
# line option processing).
if [ $# = "0" ]; then
#configs_avail="auto "$(ls ${config_dirpath})
echo "${script_name}: "
echo "${script_name}: *** No configuration given! ***"
echo "${script_name}: "
echo "${script_name}: Default configuration behavior is not implemented (for your"
echo "${script_name}: own safety). Please re-run '${script_name}' and specify one"
echo "${script_name}: of the existing configurations in the source distribution's"
echo "${script_name} '${registry_file}' file:"
echo "${script_name}: "
#for k in "${!config_registry[@]}"; do
for cr_var in ${!config_registry_*}; do
#v=${config_registry[$k]}
k=${cr_var##config_registry_}; v=${!cr_var}
echo "${script_name}: $k (${v})"
done
echo "${script_name}: "
exit 1
elif [ $# != "1" ]; then # more than one configuration argument given.
print_usage
fi
if [ $1 = "auto" ]; then
echo "${script_name}: automatic configuration requested."
# Call the auto_detect() function and save the returned string in
# config_name.
config_name=$(auto_detect)
#config_name="generic"
# Debugging stuff. When confirming the behavior of auto_detect(),
# it is useful to output ${config_name}, which in theory could be
# set temoprarily to something other than the config_name, such as
# the compilation command.
if [ "${debug_auto_detect}" = "yes" ]; then
echo "auto-detect program compilation command: ${config_name}"
exit 1
fi
echo "${script_name}: hardware detection driver returned '${config_name}'."
# If the auto-detect code returned the "generic" string, it means we
# were unable to automatically detect the user's hardware type. While
# this is going to be a rare event, it will likely lead the user to
# experience much lower performance than expected, and thus we will
# warn them about it at the end of the configure output (to increase
# the chances that they see it).
if [ "${config_name}" = "generic" ]; then
warn_user_generic=1
else
warn_user_generic=0
fi
else
# Use the command line argument as the configuration name.
config_name=$1
echo "${script_name}: manual configuration requested; configuring with '${config_name}'."
fi
# Use the selected config name to look up the list of configurations
# and kernels associated with that name.
#config_list=${config_registry[${config_name}]}
#kernel_list=${kernel_registry[${config_name}]}
config_list=$(query_array "config_registry" ${config_name})
kernel_list=$(query_array "kernel_registry" ${config_name})
# Use the config_registry and kernel_registry to build a kconfig_registry
# for the selected config_name.
build_kconfig_registry "${config_name}"
# Print the configuration list and kernel list, if requested.
if [ "${show_config_list}" == "1" ]; then
echo "${script_name}: configuration list:"
#for k in "${!config_registry[@]}"; do
for cr_var in ${!config_registry_*}; do
#v=${config_registry[$k]}
k=${cr_var##config_registry_}; v=${!cr_var}
echo "${script_name}: $k: ${v}"
done
echo "${script_name}: kernel list:"
#for k in "${!kernel_registry[@]}"; do
for kr_var in ${!kernel_registry_*}; do
#v=${kernel_registry[$k]}
k=${kr_var##kernel_registry_}; v=${!kr_var}
echo "${script_name}: $k: ${v}"
done
echo "${script_name}: kernel-to-config map for '${config_name}':"
#for k in "${!kconfig_registry[@]}"; do
for kc_var in ${!kconfig_registry_*}; do
#v=${kconfig_registry[$k]}
k=${kc_var##kconfig_registry_}; v=${!kc_var}
echo "${script_name}: $k: ${v}"
done
fi
# For each kernel in the kernel list, reduce the list of associated
# sub-configurations (in the kconfig_registry) to a singleton using
# the following rules:
# 1. If the list is a singleton, use that name.
# 2. If the list contains a sub-configuration name that matches the
# kernel name, use that name.
# 3. Otherwise, use the first name in the list.
# We use the chosen singleton to ceate a "kernel:subconfig" pair, which
# we accumulate into a list. This list is the kernel-to-config map, or
# kconfig_map.
# We use a sorted version of kernel_list so that it ends up matching the
# display order of the kconfig_registry above.
kernel_list_sort=$(echo ${kernel_list} | xargs -n1 | sort -u)
kconfig_map=""
for kernel in ${kernel_list_sort}; do
#configs="${kconfig_registry[$kernel]}"
configs=$(query_array "kconfig_registry" ${kernel})
has_one_kernel=$(is_singleton "${configs}")
contains_kernel=$(is_in_list "${kernel}" "${configs}")
# Check if the list is a singleton.
if [ "${has_one_kernel}" == "true" ]; then
reducedclist="${configs}"
# Check if the list contains a sub-config name that matches the kernel.
elif [ "${contains_kernel}" == "true" ]; then
reducedclist="${kernel}"
# Otherwise, use the last name.
else
last_config=${configs##* }
reducedclist="${last_config}"
fi
# Create a new "kernel:subconfig" pair and add it to the kconfig_map
# list, removing whitespace.
new_pair="${kernel}:${reducedclist}"
kconfig_map=$(canonicalize_ws "${kconfig_map} ${new_pair}")
done
if [ "${show_config_list}" == "1" ]; then
echo "${script_name}: kernel-to-config map for '${config_name}' (chosen pairs):"
for k in ${kconfig_map}; do
echo "${script_name}: $k"
done
fi
echo "${script_name}: checking configuration against contents of '${registry_file}'."
# First, ensure that the config name is registered (ie: it is present
# in the config_registry file).
if [ -z "${config_list}" ]; then
# NOTE: This branch should never execute when using auto-detection,
# but we have it here just in case.
if [ $1 = "auto" ]; then
echo "${script_name}: 'auto-detected configuration '${config_name}' is NOT registered!"
echo "${script_name}: "
echo "${script_name}: *** Cannot continue with unregistered configuration '${config_name}'. ***"
echo "${script_name}: "
exit 1;
else
# At this point, we know: (a) config_list is empty; and (b) the user
# requested manual configuration. If the config_name given by the
# user is present in the configuration blacklist (config_blist),
# then we can deduce why the config_list is empty: because the only
# subconfig implied by config_name is blacklisted. Thus, we cannot
# proceed.
if [ $(is_in_list "${config_name}" "${config_blist}") == "true" ]; then
echo "${script_name}: 'user-specified configuration '${config_name}' is blacklisted!"
echo "${script_name}: "
echo "${script_name}: *** Cannot continue with blacklisted configuration '${config_name}'. ***"
echo "${script_name}: *** Try updating your compiler and/or assembler (binutils) versions. ***"
echo "${script_name}: "
exit 1;
else
# If config_name is NOT present in config_blist, then we know
# that config_list is empty simply because config_name is
# unregistered.
echo "${script_name}: 'user-specified configuration '${config_name}' is NOT registered!"
echo "${script_name}: "
echo "${script_name}: *** Cannot continue with unregistered configuration '${config_name}'. ***"
echo "${script_name}: "
exit 1;
fi
fi
else
# This branch executes when the configuration is found to be present
# (i.e. registered) in the config_registry file.
echo "${script_name}: configuration '${config_name}' is registered."
echo "${script_name}: '${config_name}' is defined as having the following sub-configurations:"
echo "${script_name}: ${config_list}"
echo "${script_name}: which collectively require the following kernels:"
echo "${script_name}: ${kernel_list}"
fi
# Based on the number of sub-configurations, set default value for disable_blis_arch_type
# (if user hasn't set option). BLIS_ARCH_TYPE functionality only makes sense for use with
# processor families containing multiple sub-configurations, but user can force the
# functionality to be enabled/disabled with --enable-blis-arch-type/--disable-blis-arch-type
# configure options.
if [ "x${disable_blis_arch_type}" = "xunset" ]; then
config_list_count=$(echo ${config_list} |wc -w)
if [ "x${config_list_count}" = "x1" ]; then
disable_blis_arch_type='yes'
else
disable_blis_arch_type='no'
fi
fi
echo "${script_name}: checking sub-configurations:"
# Now, verify that the constituent configurations associated with the
# config name are all valid.
for conf in ${config_list}; do
# First confirm that the current configuration is registered.
#this_clist=${config_registry[${conf}]}
this_clist=$(query_array "config_registry" ${conf})
# If the config_list associated with conf is empty, then it was
# never entered into the config_registry to begin with. Thus,
# conf must be unregistered.
if [ -z "${this_clist}" ]; then
echo "${script_name}: '${conf}' is NOT registered!"
echo "${script_name}: "
echo "${script_name}: *** Cannot continue with unregistered configuration '${conf}'. ***"
echo "${script_name}: "
exit 1;
else
echo -n "${script_name}: '${conf}' is registered."
fi
# Then confirm that the current sub-configuration directory exists.
if [ ! -d "${config_dirpath}/${conf}" ]; then
echo "..but does NOT exist!"
echo "${script_name}: "
echo "${script_name}: *** Cannot continue with nonexistent configuration '${conf}'. ***"
echo "${script_name}: "
exit 1;
else
echo "..and exists."
fi
done
echo "${script_name}: checking sub-configurations' requisite kernels:"
# Also, let's verify that the requisite kernel sets associated with
# the config name all correspond to directories that exist.
for kernel in ${kernel_list}; do
echo -n "${script_name}: '${kernel}' kernels..."
# Confirm that the current kernel sub-directory exists.
if [ ! -d "${kernels_dirpath}/${kernel}" ]; then
echo "do NOT exist!"
echo "${script_name}: "
echo "${script_name}: *** Cannot continue with nonexistent kernel '${kernel}'. ***"
echo "${script_name}: "
exit 1;
else
echo "exist."
fi
done
# In order to determine the default behavior of the --with[out]-memkind
# option, we try to detect whether libmemkind is available. If it is,
# the default implied option will be --with-memkind; otherwise, will be
# --without-memkind.
has_memkind=$(has_libmemkind)
# Try to determine whether the chosen compiler supports #pragma omp simd.
pragma_omp_simd=$(has_pragma_omp_simd)
# -- Prepare variables for subsitution into template files -----------------
# Parse the status of the prefix option and echo feedback.
if [ -n "${prefix_flag}" ]; then
echo "${script_name}: detected --prefix='${prefix}'."
else
echo "${script_name}: no install prefix option given; defaulting to '${prefix}'."
fi
# Parse the status of the exec_prefix option and echo feedback.
if [ -n "${exec_prefix_flag}" ]; then
echo "${script_name}: detected --exec-prefix='${exec_prefix}'."
else
echo "${script_name}: no install exec_prefix option given; defaulting to PREFIX."
fi
# Parse the status of the libdir option and echo feedback.
if [ -n "${libdir_flag}" ]; then
echo "${script_name}: detected --libdir='${libdir}'."
else
echo "${script_name}: no install libdir option given; defaulting to EXECPREFIX/lib."
fi
# Parse the status of the includedir option and echo feedback.
if [ -n "${includedir_flag}" ]; then
echo "${script_name}: detected --includedir='${includedir}'."
else
echo "${script_name}: no install includedir option given; defaulting to PREFIX/include."
fi
# Parse the status of the sharedir option and echo feedback.
if [ -n "${sharedir_flag}" ]; then
echo "${script_name}: detected --sharedir='${sharedir}'."
else
echo "${script_name}: no install sharedir option given; defaulting to PREFIX/share."
fi
# Echo the installation directories that we settled on.
echo "${script_name}: final installation directories:"
echo "${script_name}: prefix: "${prefix}
echo "${script_name}: exec_prefix: "${exec_prefix}
echo "${script_name}: libdir: "${libdir}
echo "${script_name}: includedir: "${includedir}
echo "${script_name}: sharedir: "${sharedir}
echo "${script_name}: NOTE: the variables above can be overridden when running make."
# Check if CFLAGS is non-empty.
if [ -n "${CFLAGS}" ]; then
cflags_preset="${CFLAGS}"
echo "${script_name}: detected preset CFLAGS; prepending:"
echo "${script_name}: ${cflags_preset}"
else
cflags_preset=''
echo "${script_name}: no preset CFLAGS detected."
fi
# Check if LDFLAGS is non-empty.
if [ -n "${LDFLAGS}" ]; then
ldflags_preset="${LDFLAGS}"
echo "${script_name}: detected preset LDFLAGS; prepending:"
echo "${script_name}: ${ldflags_preset}"
else
ldflags_preset=''
echo "${script_name}: no preset LDFLAGS detected."
fi
# Check if the debug flag was specified.
if [ -n "${debug_flag}" ]; then
if [ "x${debug_type}" = "xopt" ]; then
echo "${script_name}: enabling debug symbols with optimizations."
elif [ "x${debug_type}" = "xsde" ]; then
debug_type='sde'
echo "${script_name}: enabling SDE processor emulation."
else
debug_type='noopt'
echo "${script_name}: enabling debug symbols; optimizations disabled."
fi
else
debug_type='off'
echo "${script_name}: debug symbols disabled."
fi
# Check if the verbose make flag was specified.
if [ "x${enable_verbose}" = "xyes" ]; then
echo "${script_name}: enabling verbose make output. (disable with 'make V=0'.)"
else
echo "${script_name}: disabling verbose make output. (enable with 'make V=1'.)"
fi
# Check if the ARG_MAX hack was requested.
if [ "x${enable_arg_max_hack}" = "xyes" ]; then
echo "${script_name}: enabling ARG_MAX hack."
else
echo "${script_name}: disabling ARG_MAX hack."
fi
enable_shared_01=1
# Check if the static lib flag was specified.
if [ "x${enable_static}" = "xyes" -a "x${enable_shared}" = "xyes" ]; then
echo "${script_name}: building BLIS as both static and shared libraries."
elif [ "x${enable_static}" = "xyes" -a "x${enable_shared}" = "xno" ]; then
echo "${script_name}: building BLIS as a static library (shared library disabled)."
enable_shared_01=0
elif [ "x${enable_static}" = "xno" -a "x${enable_shared}" = "xyes" ]; then
echo "${script_name}: building BLIS as a shared library (static library disabled)."
else
echo "${script_name}: Both static and shared libraries were disabled."
echo "${script_name}: *** Please enable one (or both) to continue."
exit 1
fi
# Check if the "export shared" flag was specified.
if [ "x${export_shared}" = "xall" ]; then
if [ "x${enable_shared}" = "xyes" ]; then
echo "${script_name}: exporting all symbols within shared library."
else
echo "${script_name}: ignoring request to export all symbols within shared library."
fi
elif [ "x${export_shared}" = "xpublic" ]; then
if [ "x${enable_shared}" = "xyes" ]; then
echo "${script_name}: exporting only public symbols within shared library."
fi
else
echo "${script_name}: *** Invalid argument '${export_shared}' to --export-shared option given."
echo "${script_name}: *** Please use 'public' or 'all'."
exit 1
fi
# Check if we are building with or without operating system support.
if [ "x${enable_system}" = "xyes" ]; then
echo "${script_name}: enabling operating system support."
enable_system_01=1
else
echo "${script_name}: disabling operating system support."
echo "${script_name}: WARNING: all threading will be disabled!"
enable_system_01=0
# Force threading to be disabled.
threading_model='off'
fi
# Check the threading model flag and standardize its value, if needed.
# NOTE: 'omp' is deprecated but still supported; 'openmp' is preferred.
enable_openmp='no'
enable_openmp_01=0
enable_pthreads='no'
enable_pthreads_01=0
if [ "x${threading_model}" = "xauto" ]; then
echo "${script_name}: determining the threading model automatically."
elif [ "x${threading_model}" = "xopenmp" ] ||
[ "x${threading_model}" = "xomp" ]; then
echo "${script_name}: using OpenMP for threading."
enable_openmp='yes'
enable_openmp_01=1
threading_model="openmp" # Standardize the value.
elif [ "x${threading_model}" = "xpthreads" ] ||
[ "x${threading_model}" = "xpthread" ] ||
[ "x${threading_model}" = "xposix" ]; then
echo "${script_name}: using POSIX threads for threading."
enable_pthreads='yes'
enable_pthreads_01=1
threading_model="pthreads" # Standardize the value.
elif [ "x${threading_model}" = "xoff" ] ||
[ "x${threading_model}" = "xno" ] ||
[ "x${threading_model}" = "xnone" ]; then
echo "${script_name}: threading is disabled."
threading_model="off"
else
echo "${script_name}: *** Unsupported threading model: ${threading_model}."
exit 1
fi
# Check the method of assigning micropanels to threads in the JR and IR
# loops.
enable_jrir_slab_01=0
enable_jrir_rr_01=0
if [ "x${thread_part_jrir}" = "xslab" ]; then
echo "${script_name}: requesting slab threading in jr and ir loops."
enable_jrir_slab_01=1
elif [ "x${thread_part_jrir}" = "xrr" ]; then
echo "${script_name}: requesting round-robin threading in jr and ir loops."
enable_jrir_rr_01=1
else
echo "${script_name}: *** Unsupported method of thread partitioning in jr and ir loops: ${threading_model}."
exit 1
fi
# Convert 'yes' and 'no' flags to booleans.
if [ "x${enable_pba_pools}" = "xyes" ]; then
echo "${script_name}: internal memory pools for packing blocks are enabled."
enable_pba_pools_01=1
else
echo "${script_name}: internal memory pools for packing blocks are disabled."
enable_pba_pools_01=0
fi
if [ "x${enable_sba_pools}" = "xyes" ]; then
echo "${script_name}: internal memory pools for small blocks are enabled."
enable_sba_pools_01=1
else
echo "${script_name}: internal memory pools for small blocks are disabled."
enable_sba_pools_01=0
fi
if [ "x${enable_mem_tracing}" = "xyes" ]; then
echo "${script_name}: memory tracing output is enabled."
enable_mem_tracing_01=1
else
echo "${script_name}: memory tracing output is disabled."
enable_mem_tracing_01=0
fi
if [ "x${has_memkind}" = "xyes" ]; then
if [ "x${enable_memkind}" = "x" ]; then
# If no explicit option was given for libmemkind one way or the other,
# we use the value returned previously by has_libmemkind(), in this
# case "yes", to determine the default.
echo "${script_name}: libmemkind found; default is to enable use."
enable_memkind="yes"
enable_memkind_01=1
else
if [ "x${enable_memkind}" = "xyes" ]; then
echo "${script_name}: received explicit request to enable libmemkind."
enable_memkind="yes"
enable_memkind_01=1
else
echo "${script_name}: received explicit request to disable libmemkind."
enable_memkind="no"
enable_memkind_01=0
fi
fi
else
echo "${script_name}: libmemkind not found; disabling."
if [ "x${enable_memkind}" = "xyes" ]; then
echo "${script_name}: cannot honor explicit request to enable libmemkind."
fi
enable_memkind="no"
enable_memkind_01=0
fi
if [ "x${pragma_omp_simd}" = "xyes" ]; then
echo "${script_name}: compiler appears to support #pragma omp simd."
enable_pragma_omp_simd_01=1
else
echo "${script_name}: compiler appears to not support #pragma omp simd."
enable_pragma_omp_simd_01=0
fi
if [ "x${enable_cblas}" = "xyes" ]; then
echo "${script_name}: the CBLAS compatibility layer is enabled."
enable_cblas_01=1
# Force BLAS layer when CBLAS is enabled
enable_blas='yes'
else
echo "${script_name}: the CBLAS compatibility layer is disabled."
enable_cblas_01=0
fi
if [ "x${enable_blas}" = "xyes" ]; then
echo "${script_name}: the BLAS compatibility layer is enabled."
enable_blas_01=1
else
echo "${script_name}: the BLAS compatibility layer is disabled."
enable_blas_01=0
fi
if [ "x${enable_mixed_dt}" = "xyes" ]; then
echo "${script_name}: mixed datatype support is enabled."
if [ "x${enable_mixed_dt_extra_mem}" = "xyes" ]; then
echo "${script_name}: mixed datatype optimizations requiring extra memory are enabled."
enable_mixed_dt_extra_mem_01=1
else
echo "${script_name}: mixed datatype optimizations requiring extra memory are disabled."
enable_mixed_dt_extra_mem_01=0
fi
enable_mixed_dt_01=1
else
echo "${script_name}: mixed datatype support is disabled."
enable_mixed_dt_extra_mem_01=0
enable_mixed_dt_01=0
fi
if [ "x${enable_mnk1_matrix}" = "xyes" ]; then
echo "${script_name}: M,N,K=1 matrix handling is enabled."
enable_mnk1_matrix_01=1
else
echo "${script_name}: M,N,K=1 matrix handling is disabled."
enable_mnk1_matrix_01=0
fi
if [ "x${enable_tiny_matrix}" = "xyes" ]; then
echo "${script_name}: tiny matrix handling is enabled."
enable_tiny_matrix_01=1
else
echo "${script_name}: tiny matrix handling is disabled."
enable_tiny_matrix_01=0
fi
if [ "x${enable_small_matrix}" = "xyes" ]; then
echo "${script_name}: small matrix handling is enabled."
enable_small_matrix_01=1
else
echo "${script_name}: small matrix handling is disabled."
enable_small_matrix_01=0
fi
if [ "x${enable_sup_handling}" = "xyes" ]; then
echo "${script_name}: SUP matrix handling is enabled."
enable_sup_handling_01=1
else
echo "${script_name}: SUP matrix handling is disabled."
enable_sup_handling_01=0
fi
if [ "x${enable_small_matrix_trsm}" = "xyes" ]; then
echo "${script_name}: TRSM small matrix handling is enabled."
enable_small_matrix_trsm_01=1
else
echo "${script_name}: TRSM small matrix handling is disabled."
enable_small_matrix_trsm_01=0
fi
if [ "x${enable_trsm_preinversion}" = "xyes" ]; then
echo "${script_name}: trsm diagonal element pre-inversion is enabled."
enable_trsm_preinversion_01=1
else
echo "${script_name}: trsm diagonal element pre-inversion is disabled."
enable_trsm_preinversion_01=0
fi
if [ "x${enable_security_flags}" = "xyes" ]; then
echo "${script_name}: security hardening flags are enabled."
else
echo "${script_name}: security hardening flags are disabled."
fi
# Check AOCL DTL flag configuration
enable_aocl_dtl_trace_01=0
enable_aocl_dtl_log_01=0
if [ "x${enable_aocl_dtl}" = "xtrace" ]; then
enable_aocl_dtl_trace_01=1
elif [ "x${enable_aocl_dtl}" = "xlog" ]; then
enable_aocl_dtl_log_01=1
elif [ "x${enable_aocl_dtl}" = "xall" ]; then
enable_aocl_dtl_trace_01=1
enable_aocl_dtl_log_01=1
fi
if [ "x${aocl_dtl_trace_level_number}" = "xunset" ]; then
aocl_dtl_trace_level_number=5
fi
# Check aocl dynamic threading configuration and enable it only if
# multi-threading is enabled
if [ "x${enable_aocl_dynamic}" = "xyes" ]; then
if [ "x${threading_model}" != "xoff" ]; then
echo "${script_name}: dynamic selection of number of threads is enabled"
enable_aocl_dynamic_01=1
else
enable_aocl_dynamic_01=0
enable_aocl_dynamic="no"
echo "${script_name}: dynamic threading is disabled as multithreading is disabled"
fi
else
echo "${script_name}: dynamic selection of number of threads is disabled"
enable_aocl_dynamic_01=0
fi
# Report integer sizes.
if [ "x${int_type_size}" = "x32" ]; then
echo "${script_name}: the BLIS API integer size is 32-bit."
elif [ "x${int_type_size}" = "x64" ]; then
echo "${script_name}: the BLIS API integer size is 64-bit."
else
echo "${script_name}: the BLIS API integer size is automatically determined."
fi
if [ "x${blas_int_type_size}" = "x32" ]; then
echo "${script_name}: the BLAS/CBLAS API integer size is 32-bit."
elif [ "x${blas_int_type_size}" = "x64" ]; then
echo "${script_name}: the BLAS/CBLAS API integer size is 64-bit."
else
echo "${script_name}: the BLAS/CBLAS API integer size is automatically determined."
fi
# Disallow the simultaneous use of 64-bit integers in the BLAS and
# 32-bit integers in BLIS.
if [ "x${blas_int_type_size}" = "x64" -a "x${int_type_size}" = "x32" ]; then
echo "${script_name}: *** To avoid the possibility of truncation, we do not allow use of 64-bit integers in the BLAS API with 32-bit integers in BLIS. Please use a different configuration of integers."
exit 1
fi
# Check if addons were given.
if [ -n "${addon_flag}" ]; then
# Remove duplicates in the addon list, if they exist.
addon_list=$(rm_duplicate_words_simple "${addon_list}")
# Check compiler version requirements for each addon
echo "${script_name}: configuring with addons:"
new_addon_list=""
for addon in ${addon_list}; do
# Check if this is aocl_gemm addon and verify compiler version
if [ "${addon}" = "aocl_gemm" ]; then
if [ "${cc_vendor}" = "gcc" ]; then
# aocl_gemm addon (LPGEMM) requires GCC 11.2 or newer
# due to AVX-512 intrinsics and optimization requirements.
if [ ${cc_major} -lt 11 ] || [ ${cc_major} -eq 11 -a ${cc_minor} -lt 2 ]; then
echo "${script_name}: warning: aocl_gemm addon requires GCC 11.2 or newer."
echo "${script_name}: warning: Current GCC version is ${cc_version}."
echo "${script_name}: warning: Skipping aocl_gemm addon."
continue
fi
elif [ "${cc_vendor}" = "clang" ]; then
# aocl_gemm addon (LPGEMM) requires Clang 12.0 or newer
# due to AVX-512 intrinsics and C++17 requirements.
if [ ${cc_major} -lt 12 ]; then
echo "${script_name}: warning: aocl_gemm addon requires Clang 12.0 or newer."
echo "${script_name}: warning: Current Clang version is ${cc_version}."
echo "${script_name}: warning: Skipping aocl_gemm addon."
continue
fi
fi
fi
new_addon_list="${new_addon_list} ${addon}"
done
addon_list="${new_addon_list}"
for addon in ${addon_list}; do
echo "${script_name}: ${addon_dir}/${addon}"
addon_fullpath="${addon_dirpath}/${addon}"
if [ ! -d "${addon_fullpath}" ]; then
echo "${script_name}: requested addon sub-directory does not exist! Cannot continue."
echo "${script_name}: *** Please verify addon existence and name."
exit 1
fi
done
enable_addons_01=1
else
echo "${script_name}: configuring with no addons."
enable_addons_01=0
fi
# Check if a sandbox was given.
if [ -n "${sandbox_flag}" ]; then
#sandbox_relpath="${sandbox_dir}/${sandbox}"
echo "${script_name}: configuring for alternate gemm implementation:"
echo "${script_name}: ${sandbox_dir}/${sandbox}"
sandbox_fullpath="${sandbox_dirpath}/${sandbox}"
if [ ! -d "${sandbox_fullpath}" ]; then
echo "${script_name}: requested sandbox sub-directory does not exist! Cannot continue."
echo "${script_name}: *** Please verify sandbox existence and name."
exit 1
fi
enable_sandbox_01=1
else
echo "${script_name}: configuring for conventional gemm implementation."
enable_sandbox_01=0
fi
# Check the method used for returning complex numbers
if [ "x${complex_return}" = "xdefault" ]; then
if [ -n "${FC}" ]; then
# Determine the complex return type from the given Fortran compiler
# Query the full vendor version string output. This includes the
# version number along with (potentially) a bunch of other textual
# clutter.
# NOTE: This maybe should use merged stdout/stderr rather than only
# stdout. But it works for now.
vendor_string="$(${FC} --version 2>/dev/null)"
# Query the compiler "vendor" (ie: the compiler's simple name) and
# isolate the version number.
# The last part ({ read first rest ; echo $first ; }) is a workaround
# to OS X's egrep only returning the first match.
fc_vendor=$(echo "${vendor_string}" | egrep -o 'ifort|GNU' | { read first rest ; echo $first ; })
if [ "x${fc_vendor}" = "xifort" ]; then
complex_return='intel'
elif [ "x${fc_vendor}" = "xGNU" ]; then
complex_return='gnu'
else
echo "${script_name}: unable to determine Fortran compiler vendor!"
complex_return='gnu'
fi
else
complex_return='gnu'
fi
fi
if [ "x${complex_return}" = "xgnu" ]; then
complex_return_intel01='0'
elif [ "x${complex_return}" = "xintel" ]; then
complex_return_intel01='1'
else
echo "${script_name}: unknown complex return type \"${complex_return}\"! Cannot continue."
echo "${script_name}: *** Acceptable values are \"gnu\" and \"intel\"."
exit 1
fi
if [ "x${disable_blis_arch_type}" = "xyes" ]; then
echo "${script_name}: user selection of code path using AOCL_ENABLE_INSTRUCTIONS,"
echo "${script_name}: BLIS_ARCH_TYPE and BLIS_MODEL_TYPE env vars is disabled."
disable_blis_arch_type_01='1'
else
disable_blis_arch_type_01='0'
fi
# Check if the user requested a custom env var name to replace BLIS_ARCH_TYPE.
if [ "x${rename_blis_arch_type}" != "xBLIS_ARCH_TYPE" ]; then
echo "${script_name}: configuring with BLIS_ARCH_TYPE env var renamed to '${rename_blis_arch_type}'."
fi
# Check if the user requested a custom env var name to replace BLIS_MODEL_TYPE.
if [ "x${rename_blis_model_type}" != "xBLIS_MODEL_TYPE" ]; then
echo "${script_name}: configuring with BLIS_MODEL_TYPE env var renamed to '${rename_blis_model_type}'."
fi
echo "${script_name}: configuring complex return type as \"${complex_return}\"."
# Variables that may contain forward slashes, such as paths, need extra
# escaping when used in sed commands. We insert those extra escape
# characters here so that the sed commands below do the right thing.
os_name_esc=$(echo "${os_name}" | sed 's/\//\\\//g')
prefix_esc=$(echo "${prefix}" | sed 's/\//\\\//g')
exec_prefix_esc=$(echo "${exec_prefix}" | sed 's/\//\\\//g')
libdir_esc=$(echo "${libdir}" | sed 's/\//\\\//g')
includedir_esc=$(echo "${includedir}" | sed 's/\//\\\//g')
sharedir_esc=$(echo "${sharedir}" | sed 's/\//\\\//g')
dist_path_esc=$(echo "${dist_path}" | sed 's/\//\\\//g')
cc_esc=$(echo "${found_cc}" | sed 's/\//\\\//g')
cxx_esc=$(echo "${found_cxx}" | sed 's/\//\\\//g')
python_esc=$(echo "${found_python}" | sed 's/\//\\\//g')
#sandbox_relpath_esc=$(echo "${sandbox_relpath}" | sed 's/\//\\\//g')
# For RANLIB, if the variable is not set, we use a default value of
# 'ranlib'.
ranlib_esc=$(echo "${RANLIB:-ranlib}" | sed 's/\//\\\//g')
# For AR, if the variable is not set, we use a default value of 'ar'.
ar_esc=$(echo "${AR:-ar}" | sed 's/\//\\\//g')
libpthread_esc=$(echo "${LIBPTHREAD--lpthread}" | sed 's/\//\\\//g')
cflags_preset_esc=$(echo "${cflags_preset}" | sed 's/\//\\\//g')
ldflags_preset_esc=$(echo "${ldflags_preset}" | sed 's/\//\\\//g')
# For Windows builds, clear the libpthread_esc variable so that
# no pthreads library is substituted into config.mk. (Windows builds
# employ an implementation of pthreads that is internal to BLIS.)
if [[ "$is_win" == "yes" && "$cc_vendor" == "clang" ]]; then
libpthread_esc=
fi
# We also clear the libpthread_esc variable for systemless builds
# (--disable-system).
if [[ "$enable_system" == "no" ]]; then
libpthread_esc=
fi
# Typically, there are no slashes in the version variable. However,
# downstream maintainers (such as those for Debian) may create custom
# tags in their local clones such as "upstream/0.4.1", which obviously
# contain slashes. This line, and subsequent use of the escaped variable
# for the version string, accommodates those use cases.
version_esc=$(echo "${version}" | sed 's/\//\\\//g')
# Create a #define for the configuration family (config_name).
uconf=$(echo ${config_name} | tr '[:lower:]' '[:upper:]')
config_name_define="#define BLIS_FAMILY_${uconf}\n"
# Create a AOCL specific #define
# This macro is enabled only for zen family configurations.
# This enables us to use different cache block sizes for TRSM instead of common level-3 block sizes.
# Note: amd64_legacy is for pre-zen architectures.
uconf=$(echo ${config_name} | grep -v amd64_legacy |grep -c 'zen\|amd64\|x86_64' | cut -d. -f1)
if [[ $uconf == 1 ]]; then
enable_aocl_zen='yes'
enable_aocl_zen_01=1
else
enable_aocl_zen='no'
enable_aocl_zen_01=0;
fi
# Create a list of #defines, one for each configuration in config_list.
config_list_defines=""
for conf in ${config_list}; do
# Convert the current config name to uppercase.
uconf=$(echo ${conf} | tr '[:lower:]' '[:upper:]')
# Create a #define and add it to the running list.
config_define="BLIS_CONFIG_${uconf}"
config_list_defines="${config_list_defines}#define ${config_define}\n"
done
# Create a list of #defines, one for each kernel set in kernel_list.
kernel_list_defines=""
for kern in ${kernel_list}; do
# Convert the current config name to uppercase.
uconf=$(echo ${kern} | tr '[:lower:]' '[:upper:]')
# Create a #define and add it to the running list.
kernel_define="BLIS_KERNELS_${uconf}"
kernel_list_defines="${kernel_list_defines}#define ${kernel_define}\n"
done
# Create a list of #includes, one for each addon in addon_list.
addon_list_includes=""
for addon in ${addon_list}; do
# Create a #define and add it to the running list.
addon_header="\"${addon}.h\""
addon_list_includes="${addon_list_includes}#include ${addon_header}\n"
done
# -- Determine whether we are performing an out-of-tree build --------------
if [ "${dist_path}" != "./" ]; then
# At this point, we know the user did not run "./configure". But we
# have not yet ruled out "<fullpath>/configure" or some # equivalent
# that uses relative paths. To further rule out these possibilities,
# we create a dummy file in the current build directory.
touch "./${dummy_file}"
# If the dummy file we just created in the current directory does not
# appear in the source distribution path, then we are in a different
# directory and thus we must create a symbolic link.
if [ ! -f "${dist_path}/${dummy_file}" ]; then
configured_oot="yes"
#echo "${script_name}: detected out-of-tree build directory."
else
configured_oot="no"
#echo "${script_name}: detected in-tree build directory."
fi
# Remove the dummy file.
rm -f "./${dummy_file}"
fi
# -- Instantiate config.mk file from template ------------------------------
# Begin substituting information into the config_mk_in file, outputting
# to config_mk_out.
echo "${script_name}: creating ${config_mk_out_path} from ${config_mk_in_path}"
cat "${config_mk_in_path}" \
| sed -e "s/@version@/${version_esc}/g" \
| sed -e "s/@so_version_major@/${so_version_major}/g" \
| sed -e "s/@so_version_minorbuild@/${so_version_minorbuild}/g" \
| sed -e "s/@config_name@/${config_name}/g" \
| sed -e "s/@config_list@/${config_list}/g" \
| sed -e "s/@kernel_list@/${kernel_list}/g" \
| sed -e "s/@kconfig_map@/${kconfig_map}/g" \
| sed -e "s/@os_name@/${os_name_esc}/g" \
| sed -e "s/@is_win@/${is_win}/g" \
| sed -e "s/@dist_path@/${dist_path_esc}/g" \
| sed -e "s/@CC_VENDOR@/${cc_vendor}/g" \
| sed -e "s/@gcc_older_than_4_9_0@/${gcc_older_than_4_9_0}/g" \
| sed -e "s/@gcc_older_than_6_1_0@/${gcc_older_than_6_1_0}/g" \
| sed -e "s/@gcc_older_than_9_1_0@/${gcc_older_than_9_1_0}/g" \
| sed -e "s/@gcc_older_than_11_2_0@/${gcc_older_than_11_2_0}/g" \
| sed -e "s/@CC@/${cc_esc}/g" \
| sed -e "s/@CXX@/${cxx_esc}/g" \
| sed -e "s/@cc_major@/${cc_major}/g" \
| sed -e "s/@cc_minor@/${cc_minor}/g" \
| sed -e "s/@cc_revision@/${cc_revision}/g" \
| sed -e "s/@RANLIB@/${ranlib_esc}/g" \
| sed -e "s/@AR@/${ar_esc}/g" \
| sed -e "s/@PYTHON@/${python_esc}/g" \
| sed -e "s/@libpthread@/${libpthread_esc}/g" \
| sed -e "s/@cflags_preset@/${cflags_preset_esc}/g" \
| sed -e "s/@ldflags_preset@/${ldflags_preset_esc}/g" \
| sed -e "s/@debug_type@/${debug_type}/g" \
| sed -e "s/@enable_system@/${enable_system}/g" \
| sed -e "s/@threading_model@/${threading_model}/g" \
| sed -e "s/@prefix@/${prefix_esc}/g" \
| sed -e "s/@exec_prefix@/${exec_prefix_esc}/g" \
| sed -e "s/@libdir@/${libdir_esc}/g" \
| sed -e "s/@includedir@/${includedir_esc}/g" \
| sed -e "s/@sharedir@/${sharedir_esc}/g" \
| sed -e "s/@enable_verbose@/${enable_verbose}/g" \
| sed -e "s/@configured_oot@/${configured_oot}/g" \
| sed -e "s/@enable_arg_max_hack@/${enable_arg_max_hack}/g" \
| sed -e "s/@enable_static@/${enable_static}/g" \
| sed -e "s/@enable_shared@/${enable_shared}/g" \
| sed -e "s/@enable_rpath@/${enable_rpath}/g" \
| sed -e "s/@export_shared@/${export_shared}/g" \
| sed -e "s/@enable_blas@/${enable_blas}/g" \
| sed -e "s/@enable_cblas@/${enable_cblas}/g" \
| sed -e "s/@enable_memkind@/${enable_memkind}/g" \
| sed -e "s/@pragma_omp_simd@/${pragma_omp_simd}/g" \
| sed -e "s/@addon_list@/${addon_list}/g" \
| sed -e "s/@sandbox@/${sandbox}/g" \
| sed -e "s/@enable_trsm_preinversion@/${enable_trsm_preinversion}/g" \
| sed -e "s/@enable_aocl_dynamic@/${enable_aocl_dynamic}/g" \
| sed -e "s/@enable_security_flags@/${enable_security_flags}/g" \
| sed -e "s/@complex_return@/${complex_return}/g" \
| sed -e "s/@blas_int_type_size@/${blas_int_type_size}/g" \
| sed -e "s/\@enable_aocl_zen\@/${enable_aocl_zen}/g" \
> "${config_mk_out_path}"
# -- Instantiate bli_config.h file from template ---------------------------
# Begin substituting information into the bli_config_h_in file, outputting
# to bli_config_h_out. NOTE: We use perl instead of sed because the version
# of sed used on OS X is old and does not handle the '\n' character
# intuitively, which was used when constructing ${config_name_define},
# ${config_list_defines}, and ${kernel_list_defines}.
echo "${script_name}: creating ${bli_config_h_out_path} from ${bli_config_h_in_path}"
cat "${bli_config_h_in_path}" \
| perl -pe "s/\@config_name_define\@/${config_name_define}/g" \
| perl -pe "s/\@config_list_defines\@/${config_list_defines}/g" \
| perl -pe "s/\@kernel_list_defines\@/${kernel_list_defines}/g" \
| sed -e "s/\@enable_aocl_zen\@/${enable_aocl_zen_01}/g" \
| sed -e "s/@enable_system@/${enable_system_01}/g" \
| sed -e "s/@enable_openmp@/${enable_openmp_01}/g" \
| sed -e "s/@enable_pthreads@/${enable_pthreads_01}/g" \
| sed -e "s/@enable_jrir_slab@/${enable_jrir_slab_01}/g" \
| sed -e "s/@enable_jrir_rr@/${enable_jrir_rr_01}/g" \
| sed -e "s/@enable_pba_pools@/${enable_pba_pools_01}/g" \
| sed -e "s/@enable_sba_pools@/${enable_sba_pools_01}/g" \
| sed -e "s/@enable_mem_tracing@/${enable_mem_tracing_01}/g" \
| sed -e "s/@int_type_size@/${int_type_size}/g" \
| sed -e "s/@blas_int_type_size@/${blas_int_type_size}/g" \
| sed -e "s/@enable_blas@/${enable_blas_01}/g" \
| sed -e "s/@enable_cblas@/${enable_cblas_01}/g" \
| sed -e "s/@enable_mixed_dt@/${enable_mixed_dt_01}/g" \
| sed -e "s/@enable_mixed_dt_extra_mem@/${enable_mixed_dt_extra_mem_01}/g" \
| sed -e "s/@enable_mnk1_matrix@/${enable_mnk1_matrix_01}/g" \
| sed -e "s/@enable_tiny_matrix@/${enable_tiny_matrix_01}/g" \
| sed -e "s/@enable_small_matrix@/${enable_small_matrix_01}/g" \
| sed -e "s/@enable_sup_handling@/${enable_sup_handling_01}/g" \
| sed -e "s/@enable_small_matrix_trsm@/${enable_small_matrix_trsm_01}/g" \
| sed -e "s/@enable_trsm_preinversion@/${enable_trsm_preinversion_01}/g" \
| sed -e "s/@enable_memkind@/${enable_memkind_01}/g" \
| sed -e "s/@enable_aocl_dynamic@/${enable_aocl_dynamic_01}/g" \
| sed -e "s/@enable_pragma_omp_simd@/${enable_pragma_omp_simd_01}/g" \
| sed -e "s/@enable_sandbox@/${enable_sandbox_01}/g" \
| sed -e "s/@enable_shared@/${enable_shared_01}/g" \
| sed -e "s/@complex_return_intel@/${complex_return_intel01}/g" \
| sed -e "s/@disable_blis_arch_type@/${disable_blis_arch_type_01}/g" \
| sed -e "s/@rename_blis_arch_type@/${rename_blis_arch_type}/g" \
| sed -e "s/@rename_blis_model_type@/${rename_blis_model_type}/g" \
| sed -e "s/@enable_aocl_dtl_trace@/${enable_aocl_dtl_trace_01}/g" \
| sed -e "s/@enable_aocl_dtl_log@/${enable_aocl_dtl_log_01}/g" \
| sed -e "s/@aocl_dtl_trace_level_number@/${aocl_dtl_trace_level_number}/g" \
> "${bli_config_h_out_path}"
# -- Instantiate bli_addon.h file from template ----------------------------
# Begin substituting information into the bli_addon_h_in file, outputting
# to bli_addon_h_out. NOTE: We use perl instead of sed because the version
# of sed used on OS X is old and does not handle the '\n' character
# intuitively, which was used when constructing ${addon_list_includes}.
echo "${script_name}: creating ${bli_addon_h_out_path} from ${bli_addon_h_in_path}"
cat "${bli_addon_h_in_path}" \
| perl -pe "s/\@addon_list_includes\@/${addon_list_includes}/g" \
| sed -e "s/@enable_addons@/${enable_addons_01}/g" \
> "${bli_addon_h_out_path}"
# -- Create top-level object directories -----------------------------------
# Create obj sub-directories (if they do not already exist).
base_obj_dirpath="${obj_dirpath}/${config_name}"
echo "${script_name}: creating ${base_obj_dirpath}"
mkdir -p ${base_obj_dirpath}
obj_config_dirpath="${base_obj_dirpath}/${config_dir}"
mkdir -p ${obj_config_dirpath}
for conf in ${config_list}; do
echo "${script_name}: creating ${obj_config_dirpath}/${conf}"
mkdir -p ${obj_config_dirpath}/${conf}
done
obj_kernels_dirpath="${base_obj_dirpath}/${kernels_dir}"
mkdir -p ${obj_kernels_dirpath}
for kern in ${kernel_list}; do
echo "${script_name}: creating ${obj_kernels_dirpath}/${kern}"
mkdir -p ${obj_kernels_dirpath}/${kern}
done
obj_refkern_dirpath="${base_obj_dirpath}/${refkern_dir}"
mkdir -p ${obj_refkern_dirpath}
for conf in ${config_list}; do
echo "${script_name}: creating ${obj_refkern_dirpath}/${conf}"
mkdir -p ${obj_refkern_dirpath}/${conf}
done
obj_aocldtl_dirpath="${base_obj_dirpath}/${aocldtl_dir}"
echo "${script_name}: creating ${obj_aocldtl_dirpath}"
mkdir -p ${obj_aocldtl_dirpath}
obj_frame_dirpath="${base_obj_dirpath}/${frame_dir}"
echo "${script_name}: creating ${obj_frame_dirpath}"
mkdir -p ${obj_frame_dirpath}
if [ -n "${addon_flag}" ]; then
obj_addon_dirpath="${base_obj_dirpath}/${addon_dir}"
for addon in ${addon_list}; do
echo "${script_name}: creating ${obj_addon_dirpath}/${addon}"
mkdir -p ${obj_addon_dirpath}/${addon}
done
fi
if [ -n "${sandbox_flag}" ]; then
obj_sandbox_dirpath="${base_obj_dirpath}/${sandbox_dir}"
echo "${script_name}: creating ${obj_sandbox_dirpath}/${sandbox}"
mkdir -p ${obj_sandbox_dirpath}/${sandbox}
fi
obj_blastest_dirpath="${base_obj_dirpath}/${blastest_dir}"
echo "${script_name}: creating ${obj_blastest_dirpath}"
mkdir -p ${obj_blastest_dirpath}
obj_testsuite_dirpath="${base_obj_dirpath}/${testsuite_dir}"
echo "${script_name}: creating ${obj_testsuite_dirpath}"
mkdir -p ${obj_testsuite_dirpath}
# Create lib directory (if it does not already exist).
base_lib_dirpath="${lib_dirpath}/${config_name}"
echo "${script_name}: creating ${base_lib_dirpath}"
mkdir -p ${base_lib_dirpath}
# Create include directory (if it does not already exist).
base_include_dirpath="${include_dirpath}/${config_name}"
echo "${script_name}: creating ${base_include_dirpath}"
mkdir -p ${base_include_dirpath}
# -- Mirror source directory hierarchies to object directories -------------
# Combine the config_list with the config_name and then remove duplicates.
config_list_plus_name=$(rm_duplicate_words "${config_list} ${config_name}")
# Mirror each of the sub-configuration directories to the object directory.
for conf in ${config_list_plus_name}; do
echo "${script_name}: mirroring ${config_dirpath}/${conf} to ${obj_config_dirpath}/${conf}"
${mirror_tree_sh} "${config_dirpath}/${conf}" "${obj_config_dirpath}/${conf}"
done
# Mirror optimized kernels source tree to its object sub-directory.
# We perform the mirroring on each configuration/kernel sub-directory
# within 'kernels'.
for kern in ${kernel_list}; do
# Only mirror the optimized kernels source directory if it exists.
# There are occasions where one of the sub-configurations in the
# config_list does not correspond to a kernels sub-directory, such
# as when architecture B is so close to architecture A that B can
# use A's kernel source code unmodified (though perhaps with
# different blocksizes).
#if [ -d "${kernels_dirpath}/${conf}" ]; then
echo "${script_name}: mirroring ${kernels_dirpath}/${kern} to ${obj_kernels_dirpath}/${kern}"
${mirror_tree_sh} "${kernels_dirpath}/${kern}" "${obj_kernels_dirpath}/${kern}"
#else
# echo "${script_name}: mirroring ${kernels_dirpath}/${conf} skipped... directory does not exist"
#fi
done
# Mirror reference kernel source tree to its object sub-directory.
echo "${script_name}: mirroring ${refkern_dirpath} to ${obj_refkern_dirpath}"
${mirror_tree_sh} ${refkern_dirpath} ${obj_refkern_dirpath}
# Mirror reference kernels source tree to its object sub-directory.
for conf in ${config_list}; do
echo "${script_name}: mirroring ${refkern_dirpath} to ${obj_refkern_dirpath}/${conf}"
${mirror_tree_sh} "${refkern_dirpath}" "${obj_refkern_dirpath}/${conf}"
done
# Mirror framework source tree to its object sub-directory.
echo "${script_name}: mirroring ${frame_dirpath} to ${obj_frame_dirpath}"
${mirror_tree_sh} ${frame_dirpath} ${obj_frame_dirpath}
# Mirror framework source tree to its object sub-directory.
echo "${script_name}: mirroring ${aocldtl_dirpath} to ${obj_aocldtl_dirpath}"
${mirror_tree_sh} ${aocldtl_dirpath} ${obj_aocldtl_dirpath}
# Mirror the chosen addon source tree to its object sub-directory.
if [ -n "${addon_flag}" ]; then
for addon in ${addon_list}; do
echo "${script_name}: mirroring ${addon_dirpath}/${addon} to ${obj_addon_dirpath}/${addon}"
${mirror_tree_sh} "${addon_dirpath}/${addon}" "${obj_addon_dirpath}/${addon}"
done
fi
# Mirror the chosen sandbox source tree to its object sub-directory.
if [ -n "${sandbox_flag}" ]; then
echo "${script_name}: mirroring ${sandbox_dirpath}/${sandbox} to ${obj_sandbox_dirpath}/${sandbox}"
${mirror_tree_sh} "${sandbox_dirpath}/${sandbox}" "${obj_sandbox_dirpath}/${sandbox}"
fi
# -- Generate makefile fragements ------------------------------------------
clist_contains_cname=$(is_in_list "${config_name}" "${config_list}")
# If the config_list does not already contain the config_name (i.e.,
# if config_name is an umbrella family), generate makefiles in that
# directory. (In the next step, we will loop over the actual sub-
# configurations and create fragments there as well.)
if [ "${clist_contains_cname}" == "false" ]; then
echo "${script_name}: creating makefile fragments in ${obj_config_dirpath}/${config_name}"
${gen_make_frags_sh} \
-h -r -v0 \
-o ${script_name} \
-p 'CONFIG' \
${config_dirpath}/${config_name} \
${obj_config_dirpath}/${config_name} \
${gen_make_frags_dirpath}/fragment.mk \
${gen_make_frags_dirpath}/suffix_list \
${gen_make_frags_dirpath}/ignore_list
fi
# Generate makefile fragments for each of the sub-configurations present
# in the configuration list.
for conf in ${config_list}; do
echo "${script_name}: creating makefile fragments in ${obj_config_dirpath}/${conf}"
${gen_make_frags_sh} \
-h -r -v0 \
-o ${script_name} \
-p 'CONFIG' \
${config_dirpath}/${conf} \
${obj_config_dirpath}/${conf} \
${gen_make_frags_dirpath}/fragment.mk \
${gen_make_frags_dirpath}/suffix_list \
${gen_make_frags_dirpath}/ignore_list
done
# Generate makefile fragments for each of the kernel sets required by
# the configuration list (in the kernel list).
for kern in ${kernel_list}; do
echo "${script_name}: creating makefile fragments in ${obj_kernels_dirpath}/${kern}"
${gen_make_frags_sh} \
-h -r -v0 \
-o ${script_name} \
-p 'KERNELS' \
${kernels_dirpath}/${kern} \
${obj_kernels_dirpath}/${kern} \
${gen_make_frags_dirpath}/fragment.mk \
${gen_make_frags_dirpath}/suffix_list \
${gen_make_frags_dirpath}/ignore_list
done
# Generate makefile fragments in the reference kernels directory.
echo "${script_name}: creating makefile fragments in ${obj_refkern_dirpath}"
${gen_make_frags_sh} \
-h -r -v0 \
-o ${script_name} \
-p 'REFKERN' \
${refkern_dirpath} \
${obj_refkern_dirpath} \
${gen_make_frags_dirpath}/fragment.mk \
${gen_make_frags_dirpath}/suffix_list \
${gen_make_frags_dirpath}/ignore_list
# Generate makefile fragments in the DTL directory.
echo "${script_name}: creating makefile fragments in ${obj_aocldtl_dirpath}"
${gen_make_frags_sh} \
-h -r -v0 \
-o ${script_name} \
-p 'AOCLDTL' \
${aocldtl_dirpath} \
${obj_aocldtl_dirpath} \
${gen_make_frags_dirpath}/fragment.mk \
${gen_make_frags_dirpath}/suffix_list \
${gen_make_frags_dirpath}/ignore_list
# Generate makefile fragments in the framework directory.
echo "${script_name}: creating makefile fragments in ${obj_frame_dirpath}"
${gen_make_frags_sh} \
-h -r -v0 \
-o ${script_name} \
-p 'FRAME' \
${frame_dirpath} \
${obj_frame_dirpath} \
${gen_make_frags_dirpath}/fragment.mk \
${gen_make_frags_dirpath}/suffix_list \
${gen_make_frags_dirpath}/ignore_list
# Generate makefile fragments in the addon sub-directory.
if [ -n "${addon_flag}" ]; then
for addon in ${addon_list}; do
echo "${script_name}: creating makefile fragments in ${obj_addon_dirpath}/${addon}"
${gen_make_frags_sh} \
-h -r -v0 \
-o ${script_name} \
-p 'ADDON' \
${addon_dirpath}/${addon} \
${obj_addon_dirpath}/${addon} \
${gen_make_frags_dirpath}/fragment.mk \
${gen_make_frags_dirpath}/suffix_list \
${gen_make_frags_dirpath}/ignore_list
done
fi
# Generate makefile fragments in the sandbox sub-directory.
if [ -n "${sandbox_flag}" ]; then
echo "${script_name}: creating makefile fragments in ${obj_sandbox_dirpath}/${sandbox}"
${gen_make_frags_sh} \
-h -r -v0 \
-o ${script_name} \
-p 'SANDBOX' \
${sandbox_dirpath}/${sandbox} \
${obj_sandbox_dirpath}/${sandbox} \
${gen_make_frags_dirpath}/fragment.mk \
${gen_make_frags_dirpath}/suffix_list \
${gen_make_frags_dirpath}/ignore_list
fi
# -- Handle out-of-tree builds ---------------------------------------------
# Under some circumstances, we need to create some symbolic links to
# properly handle out-of-tree builds.
if [ "${configured_oot}" = "yes" ]; then
# If 'Makefile' symlink does not already exist in the current
# directory, create a symbolic link to it. If one does exist, we
# use -f to force creation of a new link.
if [ ! -e "./Makefile" ]; then
echo "${script_name}: creating symbolic link to Makefile."
ln -s "${dist_path}/Makefile"
elif [ -h "./Makefile" ]; then
echo "${script_name}: symbolic link to Makefile already exists; forcing creation of new link."
ln -sf "${dist_path}/Makefile"
else
echo "${script_name}: Non-symbolic link file or directory 'Makefile' blocks creation of symlink."
echo "${script_name}: *** Please remove this entity and re-run configure."
exit 1
fi
# If 'blis.pc.in' symlink does not already exist in the current
# directory, create a symbolic link to it. If one does exist, we
# use -f to force creation of a new link.
if [ ! -e "./blis.pc.in" ]; then
echo "${script_name}: creating symbolic link to blis.pc.in."
ln -s "${dist_path}/blis.pc.in"
elif [ -h "./blis.pc.in" ]; then
echo "${script_name}: symbolic link to blis.pc.in already exists; forcing creation of new link."
ln -sf "${dist_path}/blis.pc.in"
else
echo "${script_name}: Non-symbolic link file or directory 'blis.pc.in' blocks creation of symlink."
echo "${script_name}: *** Please remove this entity and re-run configure."
exit 1
fi
# If 'common.mk' symlink does not already exist in the current
# directory, create a symbolic link to it. If one does exist, we
# use -f to force creation of a new link.
if [ ! -e "./common.mk" ]; then
echo "${script_name}: creating symbolic link to common.mk."
ln -s "${dist_path}/common.mk"
elif [ -h "./common.mk" ]; then
echo "${script_name}: symbolic link to common.mk already exists; forcing creation of new link."
ln -sf "${dist_path}/common.mk"
else
echo "${script_name}: Non-symbolic link file or directory 'common.mk' blocks creation of symlink."
echo "${script_name}: *** Please remove this entity and re-run configure."
exit 1
fi
# If 'config' symlink does not already exist in the current
# directory, create a symbolic link to it. If one does exist, we
# use -f to force creation of a new link.
if [ ! -e "./config" ]; then
echo "${script_name}: creating symbolic link to 'config' directory."
ln -s "${dist_path}/config"
elif [ -h "./config" ]; then
echo "${script_name}: symbolic link to 'config' directory already exists; forcing creation of new link."
ln -sf "${dist_path}/config"
else
echo "${script_name}: Non-symbolic link file or directory 'config' blocks creation of symlink."
echo "${script_name}: *** Please remove this entity and re-run configure."
exit 1
fi
echo "${script_name}: configured to build outside of source distribution."
else
echo "${script_name}: configured to build within top-level directory of source distribution."
fi
if [ "${warn_user_generic}" = "1" ]; then
echo "${script_name}: "
echo "${script_name}: *** Unable to automatically detect hardware type! ***"
echo "${script_name}: "
echo "${script_name}: NOTE: configure was unable to identify a subconfiguration"
echo "${script_name}: optimized for your hardware. As a result, the 'generic'"
echo "${script_name}: subconfiguration (with low-performance reference kernels)"
echo "${script_name}: will be used. For support, please open an issue on GitHub"
echo "${script_name}: at https://github.com/flame/blis/issues."
echo "${script_name}: "
fi
# Exit peacefully.
return 0
}
# The script's main entry point, passing all parameters given.
main "$@"