mirror of
https://github.com/amd/blis.git
synced 2026-03-23 10:47:21 +00:00
- The current build systems have the following behaviour
with regards to building "aocl_gemm" addon codebase(LPGEMM)
when giving "amdzen" as the target architecture(fat-binary)
- Make: Attempts to compile LPGEMM kernels using the same
compiler flags that the makefile fragments set for BLIS
kernels, based on the compiler version.
- CMake: With presets, it always enables the addon compilation
unless explicitly specified with the ENABLE_ADDON variable.
- This poses a bug with older compilers, owing to them not supporting
BF16 or INT8 intrinsic compilation.
- This patch adds the functionality to check for GCC and Clang compiler versions,
and disables LPGEMM compilation if GCC < 11.2 or Clang < 12.0.
- Make: Updated the configure script to check for the compiler version
if the addon is specified.
CMake: Updated the main CMakeLists.txt to check for the compiler version
if the addon is specified, and to also force-update the associated
cache variable update. Also updated kernels/CMakeLists.txt to
check if "aocl_gemm" remains in the ENABLE_ADDONS list after
all the checks in the previous layers.
AMD-Internal: [CPUPL-7850]
Signed-off by : Vignesh Balasubramanian <Vignesh.Balasubramanian@amd.com>
4208 lines
141 KiB
Bash
Executable File
4208 lines
141 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
#
|
|
# BLIS
|
|
# An object-based framework for developing high-performance BLAS-like
|
|
# libraries.
|
|
#
|
|
# Copyright (C) 2014, The University of Texas at Austin
|
|
# Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are
|
|
# met:
|
|
# - Redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer.
|
|
# - Redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in the
|
|
# documentation and/or other materials provided with the distribution.
|
|
# - Neither the name(s) of the copyright holder(s) nor the names of its
|
|
# contributors may be used to endorse or promote products derived
|
|
# from this software without specific prior written permission.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
#
|
|
#
|
|
|
|
#
|
|
# -- Helper functions ----------------------------------------------------------
|
|
#
|
|
|
|
print_usage()
|
|
{
|
|
# Use the version string in the 'version' file since we don't have
|
|
# the patched version string yet.
|
|
if [ -z "${version}" ]; then
|
|
version=$(cat "${version_filepath}")
|
|
fi
|
|
|
|
# Echo usage info.
|
|
echo " "
|
|
echo " ${script_name} (BLIS ${version})"
|
|
#echo " "
|
|
#echo " BLIS ${version}"
|
|
echo " "
|
|
echo " Configure BLIS's build system for compilation using a specified"
|
|
echo " configuration directory."
|
|
echo " "
|
|
echo " Usage:"
|
|
echo " "
|
|
echo " ${script_name} [options] [env. vars.] confname"
|
|
echo " "
|
|
echo " Arguments:"
|
|
echo " "
|
|
echo " confname The name of the sub-directory inside of the 'config'"
|
|
echo " directory containing the desired BLIS configuration."
|
|
echo " Note that confname MUST be specified; if it is not,"
|
|
echo " configure will complain. To build a completely generic"
|
|
echo " implementation, use the 'generic' configuration"
|
|
echo " "
|
|
echo " Options:"
|
|
echo " "
|
|
echo " -p PREFIX, --prefix=PREFIX"
|
|
echo " "
|
|
echo " The common installation prefix for all files. If given,"
|
|
echo " this option effectively implies:"
|
|
echo " --libdir=EXECPREFIX/lib"
|
|
echo " --includedir=PREFIX/include"
|
|
echo " --sharedir=PREFIX/share"
|
|
echo " where EXECPREFIX defaults to PREFIX. If this option is"
|
|
echo " not given, PREFIX defaults to '${prefix_def}'. If PREFIX"
|
|
echo " refers to a directory that does not exist, it will be"
|
|
echo " created."
|
|
echo " "
|
|
echo " --exec-prefix=EXECPREFIX"
|
|
echo " "
|
|
echo " The installation prefix for libraries. Specifically, if"
|
|
echo " given, this option effectively implies:"
|
|
echo " --libdir=EXECPREFIX/lib"
|
|
echo " If not given, EXECPREFIX defaults to PREFIX, which may be"
|
|
echo " modified by the --prefix option. If EXECPREFIX refers to"
|
|
echo " a directory that does not exist, it will be created."
|
|
echo " "
|
|
echo " --libdir=LIBDIR"
|
|
echo " "
|
|
echo " The path to which make will install libraries. If not"
|
|
echo " given, LIBDIR defaults to PREFIX/lib. If LIBDIR refers to"
|
|
echo " a directory that does not exist, it will be created."
|
|
echo " "
|
|
echo " --includedir=INCDIR"
|
|
echo " "
|
|
echo " The path to which make will install development header"
|
|
echo " files. If not given, INCDIR defaults to PREFIX/include."
|
|
echo " If INCDIR refers to a directory that does not exist, it"
|
|
echo " will be created."
|
|
echo " "
|
|
echo " --sharedir=SHAREDIR"
|
|
echo " "
|
|
echo " The path to which make will makefile fragments containing"
|
|
echo " make variables determined by configure (e.g. CC, CFLAGS,"
|
|
echo " and LDFLAGS). These files allow certain BLIS makefiles,"
|
|
echo " such as those in the examples or testsuite directories, to"
|
|
echo " operate on an installed copy of BLIS rather than a local"
|
|
echo " (and possibly uninstalled) copy. If not given, SHAREDIR"
|
|
echo " defaults to PREFIX/share. If SHAREDIR refers to a"
|
|
echo " directory that does not exist, it will be created."
|
|
echo " "
|
|
echo " --enable-verbose-make, --disable-verbose-make"
|
|
echo " "
|
|
echo " Enable (disabled by default) verbose compilation output"
|
|
echo " during make."
|
|
echo " "
|
|
echo " --enable-arg-max-hack --disable-arg-max-hack"
|
|
echo " "
|
|
echo " Enable (disabled by default) build system logic that"
|
|
echo " will allow archiving/linking the static/shared library"
|
|
echo " even if the command plus command line arguments exceeds"
|
|
echo " the operating system limit (ARG_MAX)."
|
|
echo " "
|
|
echo " -d DEBUG, --enable-debug[=DEBUG]"
|
|
echo " "
|
|
echo " Enable debugging symbols in the library. If argument"
|
|
echo " DEBUG is given as 'opt', then optimization flags are"
|
|
echo " kept in the framework, otherwise optimization is"
|
|
echo " turned off."
|
|
echo " "
|
|
echo " --disable-static, --enable-static"
|
|
echo " "
|
|
echo " Disable (enabled by default) building BLIS as a static"
|
|
echo " library. If the static library build is disabled, the"
|
|
echo " shared library build must remain enabled."
|
|
echo " "
|
|
echo " --disable-shared, --enable-shared"
|
|
echo " "
|
|
echo " Disable (enabled by default) building BLIS as a shared"
|
|
echo " library. If the shared library build is disabled, the"
|
|
echo " static library build must remain enabled."
|
|
echo " "
|
|
echo " --enable-rpath, --disable-rpath"
|
|
echo " "
|
|
echo " Enable (disabled by default) setting an install_name for"
|
|
echo " dynamic libraries on macOS which starts with @rpath rather"
|
|
echo " than the absolute install path."
|
|
echo " "
|
|
echo " -e SYMBOLS, --export-shared[=SYMBOLS]"
|
|
echo " "
|
|
echo " Specify the subset of library symbols that are exported"
|
|
echo " within a shared library. Valid values for SYMBOLS are:"
|
|
echo " 'public' (the default) and 'all'. By default, only"
|
|
echo " functions and variables that belong to public APIs are"
|
|
echo " exported in shared libraries. However, the user may"
|
|
echo " instead export all symbols in BLIS, even those that were"
|
|
echo " intended for internal use only. Note that the public APIs"
|
|
echo " encompass all functions that almost any user would ever"
|
|
echo " want to call, including the BLAS/CBLAS compatibility APIs"
|
|
echo " as well as the basic and expert interfaces to the typed"
|
|
echo " and object APIs that are unique to BLIS. Also note that"
|
|
echo " changing this option to 'all' will have no effect in some"
|
|
echo " environments, such as when compiling with clang on"
|
|
echo " Windows."
|
|
echo " "
|
|
echo " -t MODEL, --enable-threading[=MODEL], --disable-threading"
|
|
echo " "
|
|
echo " Enable threading in the library, using threading model"
|
|
echo " MODEL={openmp,pthreads,no}. If MODEL=no or "
|
|
echo " --disable-threading is specified, threading will be"
|
|
echo " disabled. The default is 'no'."
|
|
echo " "
|
|
echo " --enable-system, --disable-system"
|
|
echo " "
|
|
echo " Enable conventional operating system support, such as"
|
|
echo " pthreads for thread-safety. The default state is enabled."
|
|
echo " However, in rare circumstances you may wish to configure"
|
|
echo " BLIS for use with a minimal or nonexistent operating"
|
|
echo " system (e.g. hardware simulators). In these situations,"
|
|
echo " --disable-system may be used to jettison all compile-time"
|
|
echo " and link-time dependencies outside of the standard C"
|
|
echo " library. When disabled, this option also forces the use"
|
|
echo " of --disable-threading."
|
|
echo " "
|
|
echo " --disable-pba-pools, --enable-pba-pools"
|
|
echo " --disable-sba-pools, --enable-sba-pools"
|
|
echo " "
|
|
echo " Disable (enabled by default) use of internal memory pools"
|
|
echo " within the packing block allocator (pba) and/or the small"
|
|
echo " block allocator (sba). The former is used to allocate"
|
|
echo " memory used to pack submatrices while the latter is used"
|
|
echo " to allocate control/thread tree nodes and thread"
|
|
echo " communicators. Both allocations take place in the context"
|
|
echo " of level-3 operations. When the pba is disabled, the"
|
|
echo " malloc()-like function specified by BLIS_MALLOC_POOL is"
|
|
echo " called on-demand whenever a packing block is needed, and"
|
|
echo " when the sba is disabled, the malloc()-like function"
|
|
echo " specified by BLIS_MALLOC_INTL is called whenever a small"
|
|
echo " block is needed, with the two allocators calling free()-"
|
|
echo " like functions BLIS_FREE_POOL and BLIS_FREE_INTL,"
|
|
echo " respectively when blocks are released. When enabled,"
|
|
echo " either or both pools are populated via the same functions"
|
|
echo " mentioned previously, and henceforth blocks are checked"
|
|
echo " out and in. The library quickly reaches a state in which"
|
|
echo " it no longer needs to call malloc() or free(), even"
|
|
echo " across many separate level-3 operation invocations."
|
|
echo " "
|
|
echo " --enable-mem-tracing, --disable-mem-tracing"
|
|
echo " "
|
|
echo " Enable (disable by default) output to stdout that traces"
|
|
echo " the allocation and freeing of memory, including the names"
|
|
echo " of the functions that triggered the allocation/freeing."
|
|
echo " Enabling this option WILL NEGATIVELY IMPACT PERFORMANCE."
|
|
echo " Please use only for informational/debugging purposes."
|
|
echo " "
|
|
echo " -i SIZE, --int-size=SIZE"
|
|
echo " "
|
|
echo " Set the size (in bits) of internal BLIS integers and"
|
|
echo " integer types used in native BLIS interfaces. The"
|
|
echo " default integer type size is architecture dependent."
|
|
echo " (Hint: You can always find this value printed at the"
|
|
echo " beginning of the testsuite output.)"
|
|
echo " "
|
|
echo " -b SIZE, --blas-int-size=SIZE"
|
|
echo " "
|
|
echo " Set the size (in bits) of integer types in external"
|
|
echo " BLAS and CBLAS interfaces, if enabled. The default"
|
|
echo " integer type size used in BLAS/CBLAS is 32 bits."
|
|
echo " "
|
|
echo " --disable-blas, --enable-blas"
|
|
echo " "
|
|
echo " Disable (enabled by default) building the BLAS"
|
|
echo " compatibility layer."
|
|
echo " "
|
|
echo " --enable-cblas, --disable-cblas"
|
|
echo " "
|
|
echo " Enable (disabled by default) building the CBLAS"
|
|
echo " compatibility layer. This automatically enables the"
|
|
echo " BLAS compatibility layer as well."
|
|
echo " "
|
|
echo " --disable-mixed-dt, --enable-mixed-dt"
|
|
echo " "
|
|
echo " Disable (enabled by default) support for mixing the"
|
|
echo " storage domain and/or storage precision of matrix"
|
|
echo " operands for the gemm operation, as well as support"
|
|
echo " for computing in a precision different from one or"
|
|
echo " both of matrices A and B."
|
|
echo " "
|
|
echo " --disable-mixed-dt-extra-mem, --enable-mixed-dt-extra-mem"
|
|
echo " "
|
|
echo " Disable (enabled by default) support for additional"
|
|
echo " mixed datatype optimizations that require temporarily"
|
|
echo " allocating extra memory--specifically, a single m x n"
|
|
echo " matrix (per application thread) whose storage datatype"
|
|
echo " is equal to the computation datatype. This option may"
|
|
echo " only be enabled when mixed domain/precision support is"
|
|
echo " enabled."
|
|
echo " "
|
|
echo " --disable-mnk1-matrix, --enable-mnk1-matrix"
|
|
echo " "
|
|
echo " Disable (enabled by default) handling of matrix problem"
|
|
echo " where M, N or K = 1 via separate code branches. When disabled,"
|
|
echo " these operations will be performed by gemm rather than gemv"
|
|
echo " or other optimized implementations."
|
|
echo " "
|
|
echo " --disable-tiny-matrix, --enable-tiny-matrix"
|
|
echo " "
|
|
echo " Disable (enabled by default) handling of tiny"
|
|
echo " matrix problems via tiny code branches. When disabled,"
|
|
echo " these tiny level-3 operations will be performed by"
|
|
echo " the conventional implementation, which is optimized for"
|
|
echo " medium and large problems. Note that what qualifies as"
|
|
echo " \"tiny\" depends on thresholds that may vary by sub-"
|
|
echo " configuration."
|
|
echo " Currently only of relevance on configs that include"
|
|
echo " AMD Zen sub-configs"
|
|
echo " "
|
|
echo " --disable-small-matrix, --enable-small-matrix"
|
|
echo " "
|
|
echo " Disable (enabled by default) handling of small/skinny"
|
|
echo " matrix problems via small code branches. When disabled,"
|
|
echo " these small/skinny level-3 operations will be performed by"
|
|
echo " the conventional implementation, which is optimized for"
|
|
echo " medium and large problems. Note that what qualifies as"
|
|
echo " \"small\" depends on thresholds that may vary by sub-"
|
|
echo " configuration."
|
|
echo " Currently only of relevance on configs that include"
|
|
echo " AMD Zen sub-configs"
|
|
echo " "
|
|
echo " --disable-sup-handling, --enable-sup-handling"
|
|
echo " "
|
|
echo " Disable (enabled by default) handling of small/skinny"
|
|
echo " matrix problems via SUP code branches. When disabled,"
|
|
echo " these small/skinny level-3 operations will be performed by"
|
|
echo " the conventional implementation, which is optimized for"
|
|
echo " medium and large problems. Note that what qualifies as"
|
|
echo " \"SUP\" depends on thresholds that may vary by sub-"
|
|
echo " configuration."
|
|
echo " "
|
|
echo " --disable-small-matrix-trsm, --enable-small-matrix-trsm"
|
|
echo " "
|
|
echo " Disable (enabled by default) handling of small/skinny"
|
|
echo " TRSM problems via small code branches. When disabled,"
|
|
echo " these small/skinny level-3 operations will be performed by"
|
|
echo " the conventional implementation, which is optimized for"
|
|
echo " medium and large problems. Note that what qualifies as"
|
|
echo " \"small\" depends on thresholds that may vary by sub-"
|
|
echo " configuration."
|
|
echo " Currently only of relevance on configs that include"
|
|
echo " AMD Zen sub-configs"
|
|
echo " "
|
|
echo " -a NAME --enable-addon=NAME"
|
|
echo " "
|
|
echo " Enable the code provided by an addon. An addon consists"
|
|
echo " of a separate directory of code that provides additional"
|
|
echo " APIs, implementations, and/or operations that would"
|
|
echo " otherwise not be present within a build of BLIS. This"
|
|
echo " option may be used multiple times to specify the inclusion"
|
|
echo " of multiple addons. By default, no addons are enabled."
|
|
echo " "
|
|
echo " -s NAME --enable-sandbox=NAME"
|
|
echo " "
|
|
echo " Enable a separate sandbox implementation of gemm. This"
|
|
echo " option disables BLIS's conventional gemm implementation"
|
|
echo " (which shares common infrastructure with other level-3"
|
|
echo " operations) and instead compiles and uses the code in"
|
|
echo " the NAME directory, which is expected to be a sub-"
|
|
echo " directory of 'sandbox'. By default, no sandboxes are"
|
|
echo " enabled."
|
|
echo " "
|
|
echo " --with-memkind, --without-memkind"
|
|
echo " "
|
|
echo " Forcibly enable or disable the use of libmemkind's"
|
|
echo " hbw_malloc() and hbw_free() as substitutes for malloc()"
|
|
echo " and free(), respectively, when allocating memory for"
|
|
echo " BLIS's memory pools, which are used to manage buffers"
|
|
echo " into which matrices are packed. The default behavior"
|
|
echo " for this option is environment-dependent; if configure"
|
|
echo " detects the presence of libmemkind, libmemkind is used"
|
|
echo " by default, and otherwise it is not used by default."
|
|
echo " "
|
|
echo " -r METHOD, --thread-part-jrir=METHOD"
|
|
echo " "
|
|
echo " Request a method of assigning micropanels to threads in"
|
|
echo " the JR and IR loops. Valid values for METHOD are 'slab'"
|
|
echo " and 'rr'. Using 'slab' assigns (as much as possible)"
|
|
echo " contiguous regions of micropanels to each thread while"
|
|
echo " using 'rr' assigns micropanels to threads in a round-"
|
|
echo " robin fashion. The chosen method also applies during"
|
|
echo " the packing of A and B. The default method is 'slab'."
|
|
echo " NOTE: Specifying this option constitutes a request,"
|
|
echo " which may be ignored in select situations if the"
|
|
echo " implementation has a good reason to do so."
|
|
echo " "
|
|
echo " --disable-trsm-preinversion, --enable-trsm-preinversion"
|
|
echo " "
|
|
echo " Disable (enabled by default) pre-inversion of triangular"
|
|
echo " matrix diagonals when performing trsm. When pre-inversion"
|
|
echo " is enabled, diagonal elements are inverted outside of the"
|
|
echo " microkernel (e.g. during packing) so that the microkernel"
|
|
echo " can use multiply instructions. When disabled, division"
|
|
echo " instructions are used within the microkernel. Executing"
|
|
echo " these division instructions within the microkernel will"
|
|
echo " incur a performance penalty, but numerical robustness will"
|
|
echo " improve for certain cases involving denormal numbers that"
|
|
echo " would otherwise result in overflow in the pre-inverted"
|
|
echo " values."
|
|
echo " "
|
|
echo " --force-version=STRING"
|
|
echo " "
|
|
echo " Force configure to use an arbitrary version string"
|
|
echo " STRING. This option may be useful when repackaging"
|
|
echo " custom versions of BLIS by outside organizations."
|
|
echo " "
|
|
echo " -c, --show-config-lists"
|
|
echo " "
|
|
echo " Print the config and kernel lists, and kernel-to-config"
|
|
echo " map after they are read from file. This can be useful"
|
|
echo " when debugging certain configuration issues, and/or as"
|
|
echo " a sanity check to make sure these lists are constituted"
|
|
echo " as expected."
|
|
echo " "
|
|
echo " --complex-return=gnu|intel"
|
|
echo " "
|
|
echo " Specify the way in which complex numbers are returned"
|
|
echo " from Fortran functions, either \"gnu\" (return in"
|
|
echo " registers) or \"intel\" (return via hidden argument)."
|
|
echo " If not specified and the environment variable FC is set,"
|
|
echo " attempt to determine the return type from the compiler."
|
|
echo " Otherwise, the default is \"gnu\"."
|
|
echo " "
|
|
echo " --enable-aocl-dynamic, --disable-aocl-dynamic"
|
|
echo " "
|
|
echo " Disable (Enabled by default) dynamic selection of number of"
|
|
echo " threads used to solve the given problem."
|
|
echo " Range of optimum number of threads will be [1, num_threads],"
|
|
echo " where \"num_threads\" is number of threads set by the application."
|
|
echo " Num_threads is derived from either environment variable"
|
|
echo " OMP_NUM_THREADS or BLIS_NUM_THREADS' or bli_set_num_threads() API."
|
|
echo " "
|
|
echo " --enable-security-flags, --disable-security-flags"
|
|
echo " "
|
|
echo " Disable (Enabled by default) addition of compiler and linker"
|
|
echo " security hardening flags (e.g. -D_FORTIFY_SOURCE=2 -fstack-protector-strong"
|
|
echo " and -Wl,-z,relro -Wl,-z,now on ELF platforms)."
|
|
echo " "
|
|
echo " --enable-blis-arch-type, --disable-blis-arch-type"
|
|
echo " "
|
|
echo " Disable support for AOCL_ENABLE_INSTRUCTIONS, BLIS_ARCH_TYPE and"
|
|
echo " BLIS_MODEL_TYPE environment variables, which allows user to select"
|
|
echo " architecture specific code path and optimizations at runtime."
|
|
echo " If disabled, in builds with multiple code paths, BLIS"
|
|
echo " will still select path and optimizations automatically."
|
|
echo " Default: Enabled in builds with multiple code paths, else disabled."
|
|
echo " "
|
|
echo " --rename-blis-arch-type=STRING"
|
|
echo " "
|
|
echo " Change environment variable used to select architecture specific"
|
|
echo " code path from BLIS_ARCH_TYPE to STRING"
|
|
echo " "
|
|
echo " --rename-blis-model-type=STRING"
|
|
echo " "
|
|
echo " Change environment variable used to select architecture model specific"
|
|
echo " optimizations from BLIS_MODEL_TYPE to STRING"
|
|
echo " "
|
|
echo " --enable-aocl-dtl=OPTION, --disable-aocl-dtl"
|
|
echo " "
|
|
echo " Enable DTL tracing and/or logging functionality"
|
|
echo " OPTION={all,trace,log,off}. The default is 'off'."
|
|
echo " Unrecognized options will be treated as 'off'."
|
|
echo " Details of the options:"
|
|
echo " * logging records basic information for each BLAS"
|
|
echo " call, with some APIs including timing information."
|
|
echo " * tracing records more detailed information on"
|
|
echo " the call stack within the BLAS APIs, and is mostly"
|
|
echo " of use for BLIS developers. The level of detail is"
|
|
echo " controlled by --aocl-dtl-trace-level option. More detailed"
|
|
echo " tracing will significantly increase API runtime."
|
|
echo " "
|
|
echo " --aocl-dtl-trace-level=OPTION"
|
|
echo " "
|
|
echo " Sets the level of detail in tracing, see the description"
|
|
echo " in aocl_dtl/aocldtlcf.h for more details."
|
|
echo " OPTION=1..10, used to set different levels of detail in"
|
|
echo " tracing. Default value is 5."
|
|
echo " "
|
|
echo " -q, --quiet Suppress informational output. By default, configure"
|
|
echo " is verbose. (NOTE: -q is not yet implemented)"
|
|
echo " "
|
|
echo " -h, --help Output this information and quit."
|
|
echo " "
|
|
echo " Environment Variables:"
|
|
echo " "
|
|
echo " CC Specifies the C compiler to use."
|
|
echo " CXX Specifies the C++ compiler to use (sandbox only)."
|
|
echo " FC Specifies the Fortran compiler to use (only to determine --complex-return)."
|
|
echo " RANLIB Specifies the ranlib executable to use."
|
|
echo " AR Specifies the archiver to use."
|
|
echo " CFLAGS Specifies additional compiler flags to use (prepended)."
|
|
echo " LDFLAGS Specifies additional linker flags to use (prepended)."
|
|
echo " LIBPTHREAD Pthreads library to use."
|
|
echo " PYTHON Specifies the python interpreter to use."
|
|
echo " "
|
|
echo " Environment variables may also be specified as command line"
|
|
echo " options, e.g.:"
|
|
echo " "
|
|
echo " ./configure [options] CC=gcc haswell"
|
|
echo " "
|
|
echo " Note that not all compilers are compatible with a given"
|
|
echo " configuration."
|
|
echo " "
|
|
|
|
# Exit with non-zero exit status
|
|
exit 1
|
|
}
|
|
|
|
query_array()
|
|
{
|
|
local arr key var_name
|
|
|
|
arr="$1"
|
|
key="$2"
|
|
|
|
var_name="${arr}_${key}"
|
|
|
|
echo "${!var_name}"
|
|
}
|
|
|
|
assign_key_value()
|
|
{
|
|
local arr key val
|
|
|
|
arr="$1"
|
|
key="$2"
|
|
val="$3"
|
|
|
|
printf -v "${arr}_${key}" %s "${val}"
|
|
}
|
|
|
|
#
|
|
# FGVZ: This commented-out function is being kept as an example how how
|
|
# to effectively "pass by reference" in bash. That is, pass the name of
|
|
# a variable, instead of its conents, and then let the function use the
|
|
# variable by prepending a $, at which time it can evaluate the string
|
|
# as if it were a literal variable occurance.
|
|
#
|
|
#filteradd_to_list()
|
|
#{
|
|
# local dlist ditem list_c item_c is_blacklisted
|
|
#
|
|
# # Add $1 to the list identified by $2, but only if $1 is not
|
|
# # found in a blacklist.
|
|
#
|
|
# # Note: $2 can actually be a list of items.
|
|
# dlist=\$"$1"
|
|
# ditem=\$"$2"
|
|
#
|
|
# # Acquire the contents of $list and $item and store them in list_c
|
|
# # and item_c, respectively.
|
|
# list_c=$(eval "expr \"$dlist\" ")
|
|
# item_c=$(eval "expr \"$ditem\" ")
|
|
#
|
|
# # Iterate over $item_c in case it is actually multiple items.
|
|
# for cur_item in $item_c; do
|
|
#
|
|
# is_blacklisted=$(is_in_list "${cur_item}" "${config_blist}")
|
|
# if [ ${is_blacklisted} == "false" ]; then
|
|
#
|
|
# # If cur_item is not blacklisted, add it to list_c.
|
|
# list_c="${list_c} ${cur_item}"
|
|
# fi
|
|
# done
|
|
#
|
|
# # Update the argument.
|
|
# eval "$1=\"${list_c}\""
|
|
#}
|
|
|
|
pass_config_kernel_registries()
|
|
{
|
|
local filename passnum
|
|
local all_blist
|
|
local curline list item config kernels
|
|
local cname clist klist
|
|
|
|
# Read function arguments:
|
|
# first argument: the file containing the configuration registry.
|
|
# second argument: the pass number: 0 or 1. Pass 0 builds the
|
|
# indirect config blacklist (indirect_blist) ONLY. Pass 1 actually
|
|
# begins populating the config and kernel registries, and assumes
|
|
# the indirect_blist has already been created.
|
|
filename="$1"
|
|
passnum="$2"
|
|
|
|
# Initialize a list of indirect blacklisted configurations for the
|
|
# current iteration. These are configurations that are invalidated by
|
|
# the removal of blacklisted configurations. For example, if haswell
|
|
# is registered as needing the 'haswell' and 'zen' kernel sets:
|
|
#
|
|
# haswell: haswell/haswell/zen
|
|
#
|
|
# and 'zen' was blacklisted because of the compiler version, then the
|
|
# 'haswell' configuration must be omitted from the registry, as it no
|
|
# longer has all of the kernel sets it was expecting.
|
|
if [ "${passnum}" == "0" ]; then
|
|
indirect_blist=""
|
|
fi
|
|
|
|
# For convenience, merge the original and indirect blacklists.
|
|
# NOTE: During pass 0, all_blist is equal to config_blist, since
|
|
# indirect_blist is still empty.
|
|
all_blist="${config_blist} ${indirect_blist}"
|
|
|
|
# Disable support for indirect blacklisting by returning early during
|
|
# pass 0. See issue #214 for details [1]. Basically, I realized that
|
|
# indirect blacklisting is not needed in the use case that I envisioned
|
|
# in the real-life example above. If a subconfiguration such as haswell
|
|
# is defined to require the zen kernel set, it implies that the zen
|
|
# kernels can be compiled with haswell compiler flags. That is, just
|
|
# because the zen subconfig (and its compiler flags) is blacklisted
|
|
# does not mean that the haswell subconfig cannot compile the zen
|
|
# kernels with haswell-specific flags.
|
|
#
|
|
# [1] https://github.com/flame/blis/issues/214
|
|
#
|
|
if [ "${passnum}" == "0" ]; then
|
|
return
|
|
fi
|
|
|
|
while read -r line
|
|
do
|
|
curline="${line}"
|
|
|
|
# Remove everything after comment character '#'.
|
|
curline=${curline%%#*}
|
|
|
|
# We've stripped out leading whitespace and trailing comments. If
|
|
# the line is now empty, then we can skip it altogether.
|
|
if [ "x${curline}" = "x" ]; then
|
|
continue;
|
|
fi
|
|
|
|
# Read the config name and config list for the current line.
|
|
cname=${curline%%:*}
|
|
list=${curline##*:}
|
|
|
|
# If we encounter a slash, it means the name of the configuration
|
|
# and the kernel set needed by that configuration are different.
|
|
if [[ "${list}" == *[/]* ]]; then
|
|
|
|
#echo "Slash found."
|
|
klist=""
|
|
clist=""
|
|
for item in "${list}"; do
|
|
|
|
# The sub-configuration name is always the first sub-word in
|
|
# the slash-separated compound word.
|
|
config=${item%%/*}
|
|
|
|
# Delete the sub-configuration name from the front of the
|
|
# string, leaving the slash-separated kernel names (or just
|
|
# the kernel name, if there is only one).
|
|
kernels=${list#*/}
|
|
|
|
# Replace the slashes with spaces to transform the string
|
|
# into a space-separated list of kernel names.
|
|
kernels=$(echo -e ${kernels} | sed -e "s/\// /g")
|
|
|
|
clist="${clist} ${config}"
|
|
klist="${klist} ${kernels}"
|
|
done
|
|
else
|
|
|
|
#echo "Slash not found."
|
|
clist=${list}
|
|
klist=${list}
|
|
fi
|
|
|
|
# Strip out whitespace from the config name and config/kernel list
|
|
# on each line.
|
|
cname=$(canonicalize_ws "${cname}")
|
|
clist=$(canonicalize_ws "${clist}")
|
|
klist=$(canonicalize_ws "${klist}")
|
|
|
|
# Next, we prepare to:
|
|
# - pass 0: inspect klist for blacklisted configurations, which may
|
|
# reveal configurations as needing to be indirectly blacklisted.
|
|
# - pass 1: compare cname to the blacklists and commit clist/klist
|
|
# to their respective registries, as appropriate.
|
|
|
|
# Handle singleton and umbrella configuration entries separately.
|
|
if [ $(is_singleton_family "${cname}" "${clist}") == "true" ]; then
|
|
|
|
# Singleton configurations/families.
|
|
# Note: for singleton families, clist contains one item, which
|
|
# always equals cname, but klist could contain more than one
|
|
# item.
|
|
|
|
# Only consider updating the indirect blacklist (pass 0) or
|
|
# committing clist and klist to the registries (pass 1) if the
|
|
# configuration name (cname) is not blacklisted.
|
|
if [ $(is_in_list "${cname}" "${all_blist}") == "false" ]; then
|
|
|
|
if [ "${passnum}" == "0" ]; then
|
|
# Even if the cname isn't blacklisted, one of the requisite
|
|
# kernels might be, so we need to check klist for blacklisted
|
|
# items. If we find one, we must assume that the entire entry
|
|
# must be thrown out. (Ideally, we would simply fall back to
|
|
# reference code for the blacklisted kernels, but that is not
|
|
# at all straightforward under the current configuration
|
|
# system architecture.) Thus, we add cname to the indirect
|
|
# blacklist.
|
|
for item in ${klist}; do
|
|
if [ $(is_in_list "${item}" "${config_blist}") == "true" ]; then
|
|
indirect_blist="${indirect_blist} ${cname}"
|
|
break
|
|
fi
|
|
done
|
|
fi
|
|
|
|
if [ "${passnum}" == "1" ]; then
|
|
# Store the clist to the cname key of the config registry.
|
|
#config_registry[${cname}]=${clist}
|
|
#printf -v "config_registry_${cname}" %s "${clist}"
|
|
assign_key_value "config_registry" "${cname}" "${clist}"
|
|
fi
|
|
fi
|
|
|
|
if [ "${passnum}" == "1" ]; then
|
|
# Store the klist to the cname key of the kernel registry.
|
|
#kernel_registry[${cname}]=${klist}
|
|
#printf -v "kernel_registry_${cname}" %s "${klist}"
|
|
assign_key_value "kernel_registry" "${cname}" "${klist}"
|
|
fi
|
|
|
|
else
|
|
# Umbrella configurations/families.
|
|
|
|
# First we check cname, which should generally not be blacklisted
|
|
# for umbrella families, but we check anyway just to be safe.
|
|
if [ $(is_in_list "${cname}" "${all_blist}") == "false" ]; then
|
|
|
|
if [ "${passnum}" == "1" ]; then
|
|
|
|
# Check each item in the clist and klist. (At this point,
|
|
# clist == klist.) If any sub-config is blacklisted, we
|
|
# omit it from clist and klist.
|
|
for item in ${clist}; do
|
|
|
|
if [ $(is_in_list "${item}" "${all_blist}") == "true" ]; then
|
|
clist=$(remove_from_list "${item}" "${clist}")
|
|
klist=$(remove_from_list "${item}" "${klist}")
|
|
fi
|
|
done
|
|
|
|
# Store the config and kernel lists to entries that
|
|
# corresponds to the config name.
|
|
#config_registry[${cname}]=${clist}
|
|
#kernel_registry[${cname}]=${klist}
|
|
#printf -v "config_registry_${cname}" %s "${clist}"
|
|
#printf -v "kernel_registry_${cname}" %s "${klist}"
|
|
assign_key_value "config_registry" "${cname}" "${clist}"
|
|
assign_key_value "kernel_registry" "${cname}" "${klist}"
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
done < "${filename}"
|
|
|
|
if [ "${passnum}" == "0" ]; then
|
|
# Assign the final indirect blacklist (with whitespace removed).
|
|
indirect_blist="$(canonicalize_ws ${indirect_blist})"
|
|
fi
|
|
}
|
|
|
|
read_registry_file()
|
|
{
|
|
local filename
|
|
local clist klist
|
|
local iterate_again config
|
|
local cr_var mem mems_mem newclist
|
|
local kr_var ker kers_ker newklist
|
|
|
|
filename="$1"
|
|
|
|
# Execute an initial pass through the config_registry file so that
|
|
# we can accumulate a list of indirectly blacklisted configurations,
|
|
# if any.
|
|
pass_config_kernel_registries "${filename}" "0"
|
|
|
|
# Now that the indirect_blist has been created, make a second pass
|
|
# through the 'config_registry' file, this time creating the actual
|
|
# config and kernel registry data structures.
|
|
pass_config_kernel_registries "${filename}" "1"
|
|
|
|
# Now we must go back through the config_registry and subsitute any
|
|
# configuration families with their constituents' members. Each time
|
|
# one of these substitutions occurs, we set a flag that causes us to
|
|
# make one more pass. (Subsituting a singleton definition does not
|
|
# prompt additional iterations.) This process stops when a full pass
|
|
# does not result in any subsitution.
|
|
|
|
iterate_again="1"
|
|
while [ "${iterate_again}" == "1" ]; do
|
|
|
|
iterate_again="0"
|
|
|
|
#for config in "${!config_registry[@]}"; do
|
|
for cr_var in ${!config_registry_*}; do
|
|
|
|
config=${cr_var##config_registry_}
|
|
|
|
clist=$(query_array "config_registry" ${config})
|
|
|
|
# The entries that define singleton families should never need
|
|
# any substitution.
|
|
if [ $(is_singleton_family "${config}" "${clist}") == "true" ]; then
|
|
continue
|
|
fi
|
|
|
|
#for mem in ${config_registry[$config]}; do
|
|
#for mem in ${!cr_var}; do
|
|
for mem in ${clist}; do
|
|
|
|
#mems_mem="${config_registry[${mem}]}"
|
|
mems_mem=$(query_array "config_registry" ${mem})
|
|
|
|
# If mems_mem is empty string, then mem was not found as a key
|
|
# in the config list associative array. In that case, we continue
|
|
# and will echo an error later in the script.
|
|
if [ "${mems_mem}" == "" ]; then
|
|
#echo " config for ${mem} is empty string! no entry in config list."
|
|
continue;
|
|
fi
|
|
|
|
if [ "${mem}" != "${mems_mem}" ]; then
|
|
|
|
#clist="${config_registry[$config]}"
|
|
clisttmp=$(query_array "config_registry" ${config})
|
|
|
|
# Replace the current config with its constituent config set,
|
|
# canonicalize whitespace, and then remove duplicate config
|
|
# set names, if they exist. Finally, update the config registry
|
|
# with the new config list.
|
|
# NOTE: WE must use substitute_words() rather than a simple sed
|
|
# expression because we need to avoid matching partial strings.
|
|
# For example, if clist above contains "foo bar barsk" and we use
|
|
# sed to substitute "bee boo" as the members of "bar", the
|
|
# result would (incorrectly) be "foo bee boo bee boosk",
|
|
# which would then get reduced, via rm_duplicate_words(), to
|
|
# "foo bee boo boosk".
|
|
#newclist=$(echo -e "${clist}" | sed -e "s/${mem}/${mems_mem}/g")
|
|
newclist=$(substitute_words "${mem}" "${mems_mem}" "${clisttmp}")
|
|
newclist=$(canonicalize_ws "${newclist}")
|
|
newclist=$(rm_duplicate_words "${newclist}")
|
|
|
|
#config_registry[${config}]=${newclist}
|
|
#printf -v "config_registry_${config}" %s "${newclist}"
|
|
assign_key_value "config_registry" "${config}" "${newclist}"
|
|
|
|
# Since we performed a substitution and changed the config
|
|
# list, mark the iteration flag to continue another round,
|
|
# but only if the config (mem) value is NOT present
|
|
# in the list of sub-configs. If it is present, then further
|
|
# substitution may not necessarily be needed this round.
|
|
if [ $(is_in_list "${mem}" "${mems_mem}") == "false" ]; then
|
|
iterate_again="1"
|
|
fi
|
|
fi
|
|
done
|
|
done
|
|
done
|
|
|
|
# Similar to what we just did for the config_registry, we now iterate
|
|
# through the kernel_registry and substitute any configuration families
|
|
# in the kernel list (right side of ':') with the members of that
|
|
# family's kernel set. This process continues iteratively, as before,
|
|
# until all families have been replaced with singleton configurations'
|
|
# kernel sets.
|
|
|
|
iterate_again="1"
|
|
while [ "${iterate_again}" == "1" ]; do
|
|
|
|
iterate_again="0"
|
|
|
|
#for config in "${!kernel_registry[@]}"; do
|
|
for kr_var in ${!kernel_registry_*}; do
|
|
|
|
config=${kr_var##kernel_registry_}
|
|
|
|
klist=$(query_array "kernel_registry" ${config})
|
|
|
|
# The entries that define singleton families should never need
|
|
# any substitution. In the kernel registry, we know it's a
|
|
# singleton entry when the cname occurs somewhere in the klist.
|
|
# (This is slightly different than the same test in the config
|
|
# registry, where we test that clist is one word and that
|
|
# clist == cname.)
|
|
if [ $(is_in_list "${config}" "${klist}") == "true" ]; then
|
|
#echo "debug: '${config}' not found in '${klist}'; skipping."
|
|
continue
|
|
fi
|
|
|
|
#for ker in ${kernel_registry[$config]}; do
|
|
#for ker in ${!kr_var}; do
|
|
for ker in ${klist}; do
|
|
|
|
#kers_ker="${kernel_registry[${ker}]}"
|
|
kers_ker=$(query_array "kernel_registry" ${ker})
|
|
|
|
# If kers_ker is empty string, then ker was not found as a key
|
|
# in the kernel registry. While not common, this can happen
|
|
# when ker identifies a kernel set that does not correspond to
|
|
# any configuration. (Example: armv7a and armv8a kernel sets are
|
|
# used by cortexa* configurations, but do not corresond to their
|
|
# own configurations.)
|
|
if [ "${kers_ker}" == "" ]; then
|
|
#echo "debug: ${ker} not found in kernel registry."
|
|
continue
|
|
fi
|
|
|
|
# If the current config/kernel (ker) differs from its singleton kernel
|
|
# entry (kers_ker), then that singleton entry was specified to use
|
|
# a different configuration's kernel set. Thus, we need to replace the
|
|
# occurrence in the current config/kernel name with that of the kernel
|
|
# set it needs.
|
|
if [ "${ker}" != "${kers_ker}" ]; then
|
|
|
|
#klisttmp="${kernel_registry[$config]}"
|
|
klisttmp=$(query_array "kernel_registry" ${config})
|
|
|
|
# Replace the current config with its requisite kernels,
|
|
# canonicalize whitespace, and then remove duplicate kernel
|
|
# set names, if they exist. Finally, update the kernel registry
|
|
# with the new kernel list.
|
|
# NOTE: WE must use substitute_words() rather than a simple sed
|
|
# expression because we need to avoid matching partial strings.
|
|
# For example, if klist above contains "foo bar barsk" and we use
|
|
# sed to substitute "bee boo" as the members of "bar", the
|
|
# result would (incorrectly) be "foo bee boo bee boosk",
|
|
# which would then get reduced, via rm_duplicate_words(), to
|
|
# "foo bee boo boosk".
|
|
#newklist=$(echo -e "${klisttmp}" | sed -e "s/${ker}/${kers_ker}/g")
|
|
newklist=$(substitute_words "${ker}" "${kers_ker}" "${klisttmp}")
|
|
newklist=$(canonicalize_ws "${newklist}")
|
|
newklist=$(rm_duplicate_words "${newklist}")
|
|
|
|
#kernel_registry[${config}]=${newklist}
|
|
#printf -v "kernel_registry_${config}" %s "${newklist}"
|
|
assign_key_value "kernel_registry" "${config}" "${newklist}"
|
|
|
|
# Since we performed a substitution and changed the kernel
|
|
# list, mark the iteration flag to continue another round,
|
|
# unless we just substituted using a singleton family
|
|
# definition, in which case we don't necessarily need to
|
|
# iterate further this round.
|
|
if [ $(is_in_list "${ker}" "${kers_ker}") == "false" ]; then
|
|
iterate_again="1"
|
|
fi
|
|
fi
|
|
done
|
|
done
|
|
done
|
|
}
|
|
|
|
substitute_words()
|
|
{
|
|
local word new_words list newlist
|
|
|
|
word="$1"
|
|
new_words="$2"
|
|
list="$3"
|
|
|
|
for str in ${list}; do
|
|
|
|
if [ "${str}" == "${word}" ]; then
|
|
newlist="${newlist} ${new_words}"
|
|
else
|
|
newlist="${newlist} ${str}"
|
|
fi
|
|
done
|
|
|
|
echo "${newlist}"
|
|
}
|
|
|
|
build_kconfig_registry()
|
|
{
|
|
local familyname clist config kernels kernel cur_configs newvalue
|
|
|
|
familyname="$1"
|
|
|
|
#clist="${config_registry[${familyname}]}"
|
|
clist=$(query_array "config_registry" ${familyname})
|
|
|
|
for config in ${clist}; do
|
|
|
|
# Look up the kernels for the current sub-configuration.
|
|
#kernels="${kernel_registry[${config}]}"
|
|
kernels=$(query_array "kernel_registry" ${config})
|
|
|
|
for kernel in ${kernels}; do
|
|
|
|
# Add the sub-configuration to the list associated with the
|
|
# kernel.
|
|
|
|
# Query the current sub-configs for the current ${kernel}.
|
|
#cur_configs="${kconfig_registry[${kernel}]}"
|
|
cur_configs=$(query_array "kconfig_registry" ${kernel})
|
|
|
|
# Add the current sub-configuration to the list of sub-configs
|
|
# we just queried.
|
|
newvalue=$(canonicalize_ws "${cur_configs} ${config}")
|
|
|
|
# Update the array.
|
|
#kconfig_registry[${kernel}]="${newvalue}"
|
|
#printf -v "kconfig_registry_${kernel}" %s "${newvalue}"
|
|
assign_key_value "kconfig_registry" "${kernel}" "${newvalue}"
|
|
|
|
done
|
|
|
|
done
|
|
}
|
|
|
|
is_in_list()
|
|
{
|
|
local word list rval item
|
|
|
|
word="$1"
|
|
list="$2"
|
|
rval="false"
|
|
|
|
for item in ${list}; do
|
|
|
|
if [ "${item}" == "${word}" ]; then
|
|
rval="true"
|
|
break
|
|
fi
|
|
done
|
|
|
|
echo "${rval}"
|
|
}
|
|
|
|
is_singleton()
|
|
{
|
|
local list rval count_str item
|
|
|
|
list="$1"
|
|
rval="false"
|
|
|
|
count_str=""
|
|
for item in ${list}; do
|
|
|
|
count_str="${count_str}x"
|
|
done
|
|
|
|
if [ "${count_str}" == "x" ]; then
|
|
rval="true"
|
|
fi
|
|
|
|
echo "${rval}"
|
|
}
|
|
|
|
is_singleton_family()
|
|
{
|
|
local familyname memberlist rval
|
|
|
|
familyname="$1"
|
|
memberlist="$2"
|
|
|
|
rval="false"
|
|
|
|
if [ $(is_singleton "${memberlist}") ]; then
|
|
|
|
if [ "${memberlist}" == "${familyname}" ]; then
|
|
rval="true"
|
|
fi
|
|
fi
|
|
|
|
echo "${rval}"
|
|
}
|
|
|
|
remove_from_list()
|
|
{
|
|
local strike_list list flist item
|
|
|
|
strike_words="$1"
|
|
list="$2"
|
|
flist=""
|
|
|
|
for item in ${list}; do
|
|
|
|
# Filter out any list item that matches any of the strike words.
|
|
if [ $(is_in_list "${item}" "${strike_words}") == "false" ]; then
|
|
flist="${flist} ${item}"
|
|
fi
|
|
done
|
|
|
|
flist=$(canonicalize_ws "${flist}")
|
|
|
|
# Return the filtered list.
|
|
echo "${flist}"
|
|
}
|
|
|
|
canonicalize_ws()
|
|
{
|
|
local str
|
|
|
|
str="$1"
|
|
|
|
# Remove leading and trailing whitespace.
|
|
str=$(echo -e "${str}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')
|
|
|
|
# Remove duplicate spaces between words.
|
|
str=$(echo -e "${str}" | tr -s " ")
|
|
|
|
# Update the input argument.
|
|
echo "${str}"
|
|
}
|
|
|
|
rm_duplicate_words_simple()
|
|
{
|
|
local str revstr revres res
|
|
|
|
str="$1"
|
|
|
|
# Remote duplicates, keeping the first occurrence.
|
|
res=$(echo "${str}" | awk '{for (i=1;i<=NF;i++) if (!a[$i]++) printf("%s%s",$i,FS)}{printf("\n")}')
|
|
|
|
echo "${res}"
|
|
}
|
|
|
|
rm_duplicate_words()
|
|
{
|
|
local str revstr revres res
|
|
|
|
str="$1"
|
|
|
|
# We reverse the initial string, THEN remove duplicates, then reverse
|
|
# the de-duplicated result so that only the last instance is kept after
|
|
# removing duplicates (rather than keeping only the first). This is
|
|
# totally unnecessary but works well for the kinds of duplicates that
|
|
# show up in certain use cases of the config and kernel registries.
|
|
# For example, these gymnastics allow us to keep only the last instance
|
|
# of the 'generic' configuration in a configuration family that
|
|
# includes it twice or more.
|
|
revstr=$(echo "${str}" | awk '{ for (i=NF; i>1; i--) printf("%s ",$i); print $1; }')
|
|
revres=$(echo "${revstr}" | awk '{for (i=1;i<=NF;i++) if (!a[$i]++) printf("%s%s",$i,FS)}{printf("\n")}')
|
|
res=$(echo "${revres}" | awk '{ for (i=NF; i>1; i--) printf("%s ",$i); print $1; }')
|
|
|
|
echo "${res}"
|
|
}
|
|
|
|
get_cc_search_list()
|
|
{
|
|
local list
|
|
|
|
# For Linux, Darwin (OS X), and generic OSes, prioritize gcc.
|
|
list="gcc clang cc"
|
|
|
|
# For OpenBSD and FreeBSD, prioritize cc and clang over gcc.
|
|
if [ "${os_name}" = "OpenBSD" ]; then
|
|
list="cc clang gcc"
|
|
elif [ "${os_name}" = "FreeBSD" ]; then
|
|
list="cc clang gcc"
|
|
fi
|
|
|
|
echo "${list}"
|
|
}
|
|
|
|
get_cxx_search_list()
|
|
{
|
|
local list
|
|
|
|
# For Linux, Darwin (OS X), and generic OSes, prioritize g++.
|
|
list="g++ clang++ c++"
|
|
|
|
# For OpenBSD and FreeBSD, prioritize cc and clang over gcc.
|
|
if [ "${os_name}" = "OpenBSD" ]; then
|
|
list="c++ clang++ g++"
|
|
elif [ "${os_name}" = "FreeBSD" ]; then
|
|
list="c++ clang++ g++"
|
|
fi
|
|
|
|
echo "${list}"
|
|
}
|
|
|
|
select_tool()
|
|
{
|
|
local search_list CC_env the_cc cc
|
|
|
|
# This is the list of compilers/tools to search for, and the order in
|
|
# which to search for them.
|
|
search_list=$1
|
|
|
|
# The environment variable associated with the compiler/tool type we
|
|
# are searching (e.g. CC, CXX, PYTHON).
|
|
CC_env=$2
|
|
|
|
# If CC_env contains something, add it to the beginning of our default
|
|
# search list.
|
|
if [ -n "${CC_env}" ]; then
|
|
search_list="${CC_env} ${search_list}"
|
|
fi
|
|
|
|
# Initialize our selected compiler/tool to empty.
|
|
the_cc=""
|
|
|
|
# Try each compiler/tool in the list and select the first one we find that
|
|
# works.
|
|
for cc in ${search_list}; do
|
|
|
|
# See if the current compiler/tool works and/or is present.
|
|
${cc} --version > /dev/null 2>&1
|
|
|
|
if [ "$?" == 0 ]; then
|
|
the_cc=${cc}
|
|
break
|
|
fi
|
|
done
|
|
|
|
# Return the selected compiler/tool.
|
|
echo "${the_cc}"
|
|
}
|
|
|
|
auto_detect()
|
|
{
|
|
local cc cflags config_defines detected_config rval cmd
|
|
|
|
# Use the same compiler that was found earlier.
|
|
cc="${found_cc}"
|
|
|
|
# For debugging: reveal what compiler was chosen for auto-detection.
|
|
#touch "${cc}.txt"
|
|
|
|
# Tweak the flags we use based on the compiler. This is mostly just
|
|
# an opportunity to turn off annoying warnings that some compilers
|
|
# may throw off.
|
|
if [ "${cc}" == "clang" ]; then
|
|
cflags="-Wno-tautological-compare"
|
|
else
|
|
cflags=
|
|
fi
|
|
|
|
# Accumulate a list of source files we'll need to compile along with
|
|
# the top-level (root) directory in which they are located.
|
|
c_src_pairs=""
|
|
c_src_pairs="${c_src_pairs} frame:bli_arch.c"
|
|
c_src_pairs="${c_src_pairs} frame:bli_cpuid.c"
|
|
c_src_pairs="${c_src_pairs} frame:bli_env.c"
|
|
c_src_pairs="${c_src_pairs} build:config_detect.c"
|
|
|
|
# Accumulate a list of full filepaths to the source files listed above.
|
|
c_src_filepaths=""
|
|
for pair in ${c_src_pairs}; do
|
|
|
|
filename=${pair#*:}
|
|
rootdir=${pair%:*}
|
|
|
|
filepath=$(find ${dist_path}/${rootdir} -name "${filename}")
|
|
c_src_filepaths="${c_src_filepaths} ${filepath}"
|
|
done
|
|
|
|
# Accumulate a list of header files we'll need to locate along with
|
|
# the top-level (root) directory in which they are located.
|
|
c_hdr_pairs=""
|
|
c_hdr_pairs="${c_hdr_pairs} frame:bli_system.h"
|
|
c_hdr_pairs="${c_hdr_pairs} frame:bli_type_defs.h"
|
|
c_hdr_pairs="${c_hdr_pairs} frame:bli_arch.h"
|
|
c_hdr_pairs="${c_hdr_pairs} frame:bli_cpuid.h"
|
|
c_hdr_pairs="${c_hdr_pairs} frame:bli_env.h"
|
|
# NOTE: These headers are needed by bli_type_defs.h.
|
|
c_hdr_pairs="${c_hdr_pairs} frame:bli_malloc.h"
|
|
c_hdr_pairs="${c_hdr_pairs} frame:bli_pthread.h"
|
|
|
|
# Accumulate a list of full paths to the header files listed above.
|
|
# While we are at it, we include the "-I" compiler option to indicate
|
|
# adding the path to the list of directories to search when encountering
|
|
# #include directives.
|
|
c_hdr_paths=""
|
|
for pair in ${c_hdr_pairs}; do
|
|
|
|
filename=${pair#*:}
|
|
rootdir=${pair%:*}
|
|
|
|
filepath=$(find ${dist_path}/${rootdir} -name "${filename}")
|
|
path=${filepath%/*}
|
|
c_hdr_paths="${c_hdr_paths} -I${path}"
|
|
done
|
|
|
|
# Define the executable name.
|
|
autodetect_x="auto-detect.x"
|
|
|
|
# Create #defines for all of the BLIS_CONFIG_ macros in bli_cpuid.c.
|
|
bli_cpuid_c_filepath=$(find ${dist_path}/frame -name "bli_cpuid.c")
|
|
config_defines=$(grep BLIS_CONFIG_ ${bli_cpuid_c_filepath} \
|
|
| sed -e 's/#ifdef /-D/g')
|
|
|
|
# Set the linker flags. We typically need pthreads (or BLIS's homerolled
|
|
# equiavlent) because it is needed for parts of bli_arch.c unrelated to
|
|
# bli_arch_string(), which is called by the main() function in ${main_c}.
|
|
if [[ "$is_win" == "no" || "$cc_vendor" != "clang" ]]; then
|
|
ldflags="${LIBPTHREAD--lpthread}"
|
|
fi
|
|
|
|
# However, if --disable-system was given, we override the choice made above
|
|
# and do not use any pthread link flags.
|
|
if [[ "$enable_system" == "no" ]]; then
|
|
ldflags=
|
|
fi
|
|
|
|
# Compile the auto-detect program using source code inside the
|
|
# framework.
|
|
# NOTE: -D_GNU_SOURCE is needed to enable POSIX extensions to
|
|
# pthreads (i.e., barriers).
|
|
|
|
double_quote_open=\"\\\"
|
|
double_quote_close=\\\"\"
|
|
cmd="${cc} ${config_defines} \
|
|
-DBLIS_CONFIGURETIME_CPUID \
|
|
-D__blis_arch_type_name=${double_quote_open}${rename_blis_arch_type}${double_quote_close} \
|
|
-D__blis_model_type_name=${double_quote_open}${rename_blis_model_type}${double_quote_close} \
|
|
${c_hdr_paths} \
|
|
-std=c99 -D_GNU_SOURCE \
|
|
${cflags} \
|
|
${c_src_filepaths} \
|
|
${ldflags} \
|
|
-o ${autodetect_x}"
|
|
|
|
if [ "${debug_auto_detect}" == "no" ]; then
|
|
|
|
# Execute the compilation command.
|
|
eval ${cmd}
|
|
|
|
else
|
|
|
|
# Debugging stuff. Instead of executing ${cmd}, join the lines together
|
|
# with tr and trim excess whitespace via awk.
|
|
cmd=$(echo "${cmd}" | tr '\n' ' ' | awk '{$1=$1;print}')
|
|
echo "${cmd}"
|
|
return
|
|
fi
|
|
|
|
# Run the auto-detect program.
|
|
detected_config=$(./${autodetect_x})
|
|
|
|
# Remove the executable file.
|
|
rm -f ./${autodetect_x}
|
|
|
|
# Return the detected sub-configuration name.
|
|
echo "${detected_config}"
|
|
}
|
|
|
|
has_libmemkind()
|
|
{
|
|
local main_c main_c_filepath LDFLAGS_mk binname rval
|
|
|
|
# Path to libmemkind detection source file.
|
|
main_c="libmemkind_detect.c"
|
|
main_c_filepath=$(find ${dist_path}/build -name "${main_c}")
|
|
|
|
# Add libmemkind to LDFLAGS.
|
|
LDFLAGS_mk="${LDFLAGS} -lmemkind"
|
|
|
|
# Binary executable filename.
|
|
binname="libmemkind-detect.x"
|
|
|
|
# Attempt to compile a simple main() program that contains a call
|
|
# to hbw_malloc() and that links to libmemkind.
|
|
${found_cc} -o ${binname} ${main_c_filepath} ${LDFLAGS_mk} 2> /dev/null
|
|
|
|
# Depending on the return code from the compile step above, we set
|
|
# enable_memkind accordingly.
|
|
if [ "$?" == 0 ]; then
|
|
rval='yes'
|
|
else
|
|
rval='no'
|
|
fi
|
|
|
|
# Remove the executable generated above.
|
|
rm -f ./${binname}
|
|
|
|
echo "${rval}"
|
|
}
|
|
|
|
has_pragma_omp_simd()
|
|
{
|
|
local main_c main_c_filepath binname rval
|
|
|
|
# Path to omp-simd detection source file.
|
|
main_c="omp_simd_detect.c"
|
|
main_c_filepath=$(find ${dist_path}/build -name "${main_c}")
|
|
|
|
# Binary executable filename.
|
|
binname="omp_simd-detect.x"
|
|
|
|
# Attempt to compile a simple main() program that contains a
|
|
# #pragma omp simd.
|
|
${found_cc} -std=c99 -O3 -march=native -fopenmp-simd \
|
|
-o ${binname} ${main_c_filepath} 2> /dev/null
|
|
|
|
# Depending on the return code from the compile step above, we set
|
|
# enable_memkind accordingly.
|
|
if [ "$?" == 0 ]; then
|
|
rval='yes'
|
|
else
|
|
rval='no'
|
|
fi
|
|
|
|
# Remove the executable generated above.
|
|
rm -f ./${binname}
|
|
|
|
echo "${rval}"
|
|
}
|
|
|
|
echoerr()
|
|
{
|
|
printf "${script_name}: error: %s\n" "$*" #>&2;
|
|
}
|
|
|
|
echowarn()
|
|
{
|
|
printf "${script_name}: warning: %s\n" "$*" #>&2;
|
|
}
|
|
|
|
blacklistcc_add()
|
|
{
|
|
# Check whether we've already blacklisted the given sub-config so
|
|
# we don't output redundant messages.
|
|
if [ $(is_in_list "$1" "${config_blist}") == "false" ]; then
|
|
|
|
echowarn "${cc_vendor} ${cc_version} does not support '$1'; adding to blacklist."
|
|
config_blist="${config_blist} $1"
|
|
fi
|
|
}
|
|
|
|
blacklistbu_add()
|
|
{
|
|
# Check whether we've already blacklisted the given sub-config so
|
|
# we don't output redundant messages.
|
|
if [ $(is_in_list "$1" "${config_blist}") == "false" ]; then
|
|
|
|
echowarn "assembler ('as' ${bu_version}) does not support '$1'; adding to blacklist."
|
|
config_blist="${config_blist} $1"
|
|
fi
|
|
}
|
|
|
|
blacklist_init()
|
|
{
|
|
config_blist=""
|
|
}
|
|
|
|
blacklist_cleanup()
|
|
{
|
|
# Remove duplicates and whitespace from the blacklist.
|
|
config_blist=$(rm_duplicate_words "${config_blist}")
|
|
config_blist=$(canonicalize_ws "${config_blist}")
|
|
}
|
|
|
|
echoerr_unsupportedcc()
|
|
{
|
|
echoerr "${script_name}: *** Unsupported compiler version: ${cc_vendor} ${cc_version}."
|
|
exit 1
|
|
}
|
|
|
|
echoerr_unsupportedpython()
|
|
{
|
|
echoerr "${script_name}: *** Unsupported python version: ${python_version}."
|
|
exit 1
|
|
}
|
|
|
|
get_binutils_version()
|
|
{
|
|
binutil=${AS:-as}
|
|
|
|
# Query the full binutils version string output. This includes the
|
|
# version string along with (potentially) a bunch of other textual
|
|
# clutter.
|
|
if [ "$(uname -s)" == "Darwin" ]; then
|
|
# The default OS X assembler uses a trifecta of brain-dead
|
|
# conventions: responding only to '-v', hanging indefinitely if
|
|
# not given an argument, and outputing the result to stderr.
|
|
# (And if you still weren't convinced, it creates an 'a.out'
|
|
# by default. So yeah.)
|
|
bu_string=$(${binutil} -v /dev/null -o /dev/null 2>&1)
|
|
else
|
|
bu_string=$(${binutil} --version 2>/dev/null)
|
|
fi
|
|
|
|
# Query the binutils version number.
|
|
# The last part ({ read first rest ; echo $first ; }) is a workaround
|
|
# to OS X's egrep only returning the first match.
|
|
bu_version=$(echo "${bu_string}" | egrep -o '[0-9]+\.[0-9]+\.?[0-9]*' | { read first rest ; echo ${first} ; })
|
|
|
|
# Parse the version number into its major, minor, and revision
|
|
# components.
|
|
bu_major=$(echo "${bu_version}" | cut -d. -f1)
|
|
bu_minor=$(echo "${bu_version}" | cut -d. -f2)
|
|
bu_revision=$(echo "${bu_version}" | cut -d. -f3)
|
|
|
|
echo "${script_name}: found assembler ('as') version ${bu_version} (maj: ${bu_major}, min: ${bu_minor}, rev: ${bu_revision})."
|
|
}
|
|
|
|
get_python_search_list()
|
|
{
|
|
local list
|
|
|
|
# For Linux, Darwin (OS X), and generic OSes, prioritize 'python'.
|
|
list="python python3 python2"
|
|
|
|
echo "${list}"
|
|
}
|
|
|
|
get_python_version()
|
|
{
|
|
local python vendor_string
|
|
|
|
python="${found_python}"
|
|
|
|
# Query the python version. This includes the version number along
|
|
# with other text, such as "Python ".
|
|
# NOTE: Python seems to echo its version info to stderr, not
|
|
# stdout, and thus we redirect stderr to stdout and capture that.
|
|
vendor_string="$(${python} --version 2>&1)"
|
|
|
|
# Drop any preceding text and save only the first numbers and what
|
|
# comes after.
|
|
python_version=$(echo "${vendor_string}" | sed -e "s/[a-zA-Z_ ]* \([0-9]*\..*\)/\1/g")
|
|
# Parse the version number into its major, minor, and revision
|
|
# components.
|
|
python_major=$(echo "${python_version}" | cut -d. -f1)
|
|
python_minor=$(echo "${python_version}" | cut -d. -f2)
|
|
python_revision=$(echo "${python_version}" | cut -d. -f3)
|
|
|
|
echo "${script_name}: found python version ${python_version} (maj: ${python_major}, min: ${python_minor}, rev: ${python_revision})."
|
|
}
|
|
|
|
check_python()
|
|
{
|
|
local python
|
|
|
|
python="${found_python}"
|
|
|
|
#
|
|
# Python requirements
|
|
#
|
|
# python1: no versions supported
|
|
# python2: 2.7+
|
|
# python3: 3.4+
|
|
#
|
|
# NOTE: It's actually unclear whether python 3.0 through 3.3.x would work.
|
|
# Python 3.5 is the oldest python3 that I have available to test with, and
|
|
# I only know that 3.4 will work thanks to feedback from Dave Love. So it's
|
|
# quite possible that some of those "unsupported" python3 versions are
|
|
# sufficient. -FGVZ
|
|
#
|
|
|
|
# Python 1.x is unsupported.
|
|
if [ ${python_major} -eq 1 ]; then
|
|
echoerr_unsupportedpython
|
|
fi
|
|
|
|
# Python 2.6.x or older is unsupported.
|
|
if [ ${python_major} -eq 2 ]; then
|
|
if [ ${python_minor} -lt 7 ]; then
|
|
echoerr_unsupportedpython
|
|
fi
|
|
fi
|
|
|
|
# Python 3.3.x or older is unsupported.
|
|
if [ ${python_major} -eq 3 ]; then
|
|
if [ ${python_minor} -lt 4 ]; then
|
|
echoerr_unsupportedpython
|
|
fi
|
|
fi
|
|
|
|
echo "${script_name}: python ${python_version} appears to be supported."
|
|
}
|
|
|
|
get_compiler_version()
|
|
{
|
|
local cc vendor_string
|
|
|
|
cc="${found_cc}"
|
|
|
|
# Query the full vendor version string output. This includes the
|
|
# version number along with (potentially) a bunch of other textual
|
|
# clutter.
|
|
# NOTE: This maybe should use merged stdout/stderr rather than only
|
|
# stdout. But it works for now.
|
|
vendor_string="$(${cc} --version 2>/dev/null)"
|
|
|
|
# Query the compiler "vendor" (ie: the compiler's simple name) and
|
|
# isolate the version number.
|
|
# The last part ({ read first rest ; echo $first ; }) is a workaround
|
|
# to OS X's egrep only returning the first match.
|
|
cc_vendor=$(echo "${vendor_string}" | egrep -o 'icc|gcc|clang|emcc|pnacl|IBM|oneAPI|crosstool-NG' | { read first rest ; echo $first ; })
|
|
if [ "${cc_vendor}" = "crosstool-NG" ]; then
|
|
# Treat compilers built by crosstool-NG (for eg: conda) as gcc.
|
|
cc_vendor="gcc"
|
|
fi
|
|
if [ "${cc_vendor}" = "icc" -o \
|
|
"${cc_vendor}" = "gcc" ]; then
|
|
cc_version=$(${cc} -dumpversion)
|
|
# If compiler is AOCC, first grep for clang and then the version number.
|
|
elif [ "${cc_vendor}" = "clang" ]; then
|
|
cc_version=$(echo "${vendor_string}" | egrep -o '(clang|LLVM) version [0-9]+\.[0-9]+\.?[0-9]*' | egrep -o '[0-9]+\.[0-9]+\.?[0-9]*')
|
|
elif [ "${cc_vendor}" = "oneAPI" ]; then
|
|
# Treat Intel oneAPI's clang as clang, not icc.
|
|
cc_vendor="clang"
|
|
cc_version=$(echo "${vendor_string}" | egrep -o '[0-9]+\.[0-9]+\.[0-9]+\.?[0-9]*' | { read first rest ; echo ${first} ; })
|
|
else
|
|
cc_version=$(echo "${vendor_string}" | egrep -o '[0-9]+\.[0-9]+\.?[0-9]*' | { read first rest ; echo ${first} ; })
|
|
fi
|
|
|
|
# Parse the version number into its major, minor, and revision
|
|
# components.
|
|
cc_major=$(echo "${cc_version}" | cut -d. -f1)
|
|
cc_minor=$(echo "${cc_version}" | cut -d. -f2)
|
|
cc_revision=$(echo "${cc_version}" | cut -d. -f3)
|
|
|
|
# gcc 7 introduced new behavior to -dumpversion whereby only the major
|
|
# version component is output. However, as part of this change, gcc 7
|
|
# also introduced a new option, -dumpfullversion, which is guaranteed to
|
|
# always output the major, minor, and revision numbers. Thus, if we're
|
|
# using gcc and its version is 7 or later, we re-query and re-parse the
|
|
# version string.
|
|
if [ "${cc_vendor}" = "gcc" -a ${cc_major} -ge 7 ]; then
|
|
|
|
# Re-query the version number using -dumpfullversion.
|
|
cc_version=$(${cc} -dumpfullversion)
|
|
|
|
# And parse the result.
|
|
cc_major=$(echo "${cc_version}" | cut -d. -f1)
|
|
cc_minor=$(echo "${cc_version}" | cut -d. -f2)
|
|
cc_revision=$(echo "${cc_version}" | cut -d. -f3)
|
|
fi
|
|
|
|
echo "${script_name}: found ${cc_vendor} version ${cc_version} (maj: ${cc_major}, min: ${cc_minor}, rev: ${cc_revision})."
|
|
}
|
|
|
|
check_compiler()
|
|
{
|
|
local cc
|
|
|
|
cc="${found_cc}"
|
|
|
|
#
|
|
# Compiler requirements
|
|
#
|
|
# General:
|
|
#
|
|
# icc 15+, gcc 4.7+, clang 3.3+
|
|
#
|
|
# Specific:
|
|
#
|
|
# skx: icc 15.0.1+, gcc 6.0+, clang 3.9+
|
|
# knl: icc 14.0.1+, gcc 5.0-14, clang 3.9+
|
|
# haswell: any
|
|
# sandybridge: any
|
|
# penryn: any
|
|
#
|
|
# zen: gcc 6.0+[1], clang 4.0+
|
|
# excavator: gcc 4.9+, clang 3.5+
|
|
# steamroller: any
|
|
# piledriver: any
|
|
# bulldozer: any
|
|
#
|
|
# cortexa57: any
|
|
# cortexa15: any
|
|
# cortexa9: any
|
|
#
|
|
# armsve: clang11+, gcc10+
|
|
#
|
|
# generic: any
|
|
#
|
|
# Note: These compiler requirements were originally modeled after similar
|
|
# requirements encoded into TBLIS's configure.ac [2].
|
|
#
|
|
# [1] While gcc 6.0 or newer is needed for zen support (-march=znver1),
|
|
# we relax this compiler version constraint a bit by targeting bdver4
|
|
# and then disabling the instruction sets that were removed in the
|
|
# transition from bdver4 to znver1. (See config/zen/make_defs.mk for
|
|
# the specific compiler flags used.)
|
|
# [2] https://github.com/devinamatthews/tblis/
|
|
#
|
|
|
|
echo "${script_name}: checking for blacklisted configurations due to ${cc} ${cc_version}."
|
|
|
|
# Fixme: check on a64fx, neoverse, and others
|
|
|
|
# gcc
|
|
if [ "x${cc_vendor}" = "xgcc" ]; then
|
|
|
|
if [ ${cc_major} -lt 4 ]; then
|
|
echoerr_unsupportedcc
|
|
fi
|
|
if [ ${cc_major} -eq 4 ]; then
|
|
blacklistcc_add "knl"
|
|
if [ ${cc_minor} -lt 7 ]; then
|
|
echoerr_unsupportedcc
|
|
fi
|
|
if [ ${cc_minor} -lt 9 ]; then
|
|
blacklistcc_add "excavator"
|
|
blacklistcc_add "zen"
|
|
fi
|
|
fi
|
|
if [[ ${cc_major} -lt 5 ]] || [[ ${cc_major} -gt 14 ]]; then
|
|
blacklistcc_add "knl"
|
|
fi
|
|
if [ ${cc_major} -lt 6 ]; then
|
|
# Normally, zen would be blacklisted for gcc prior to 6.0.
|
|
# However, we have a workaround in place in the zen
|
|
# configuration's make_defs.mk file that starts with bdver4
|
|
# and disables the instructions that were removed in znver1.
|
|
# Thus, this "blacklistcc_add" statement has been moved above.
|
|
#blacklistcc_add "zen"
|
|
blacklistcc_add "skx"
|
|
# gcc 5.x may support POWER9 but it is unverified.
|
|
blacklistcc_add "power9"
|
|
fi
|
|
if [ ${cc_major} -lt 10 ]; then
|
|
blacklistcc_add "armsve"
|
|
fi
|
|
fi
|
|
|
|
# icc
|
|
if [ "x${cc_vendor}" = "xicc" ]; then
|
|
|
|
if [ ${cc_major} -lt 15 ]; then
|
|
echoerr_unsupportedcc
|
|
fi
|
|
if [ ${cc_major} -eq 15 ]; then
|
|
if [ ${cc_revision} -lt 1 ]; then
|
|
blacklistcc_add "skx"
|
|
fi
|
|
fi
|
|
if [ ${cc_major} -eq 18 ]; then
|
|
echo "${script_name}: ${cc} ${cc_version} is known to cause erroneous results. See https://github.com/flame/blis/issues/371 for details."
|
|
blacklistcc_add "knl"
|
|
blacklistcc_add "skx"
|
|
fi
|
|
if [ ${cc_major} -ge 19 ]; then
|
|
echo "${script_name}: ${cc} ${cc_version} is known to cause erroneous results. See https://github.com/flame/blis/issues/371 for details."
|
|
echoerr_unsupportedcc
|
|
fi
|
|
fi
|
|
|
|
# clang
|
|
if [ "x${cc_vendor}" = "xclang" ]; then
|
|
if [ "$(echo ${vendor_string} | grep -o Apple)" = "Apple" ]; then
|
|
if [ ${cc_major} -lt 5 ]; then
|
|
echoerr_unsupportedcc
|
|
fi
|
|
# See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions
|
|
if [ ${cc_major} -eq 5 ]; then
|
|
# Apple clang 5.0 is clang 3.4svn
|
|
blacklistcc_add "excavator"
|
|
blacklistcc_add "zen"
|
|
fi
|
|
if [ ${cc_major} -lt 7 ]; then
|
|
blacklistcc_add "knl"
|
|
blacklistcc_add "skx"
|
|
fi
|
|
else
|
|
if [ ${cc_major} -lt 3 ]; then
|
|
echoerr_unsupportedcc
|
|
fi
|
|
if [ ${cc_major} -eq 3 ]; then
|
|
if [ ${cc_minor} -lt 3 ]; then
|
|
echoerr_unsupportedcc
|
|
fi
|
|
if [ ${cc_minor} -lt 5 ]; then
|
|
blacklistcc_add "excavator"
|
|
blacklistcc_add "zen"
|
|
fi
|
|
if [ ${cc_minor} -lt 9 ]; then
|
|
blacklistcc_add "knl"
|
|
blacklistcc_add "skx"
|
|
fi
|
|
fi
|
|
if [ ${cc_major} -lt 4 ]; then
|
|
# See comment above regarding zen support.
|
|
#blacklistcc_add "zen"
|
|
: # explicit no-op since bash can't handle empty loop bodies.
|
|
fi
|
|
if [ ${cc_major} -lt 11 ]; then
|
|
blacklistcc_add "armsve"
|
|
fi
|
|
fi
|
|
fi
|
|
}
|
|
|
|
check_compiler_version_ranges()
|
|
{
|
|
local cc
|
|
|
|
cc="${found_cc}"
|
|
|
|
#
|
|
# We check for various compiler version ranges that may cause us
|
|
# issues in properly supporting those compiler versions within the
|
|
# BLIS build system.
|
|
#
|
|
# range: gcc < 4.9.0 (ie: 4.8.5 or older)
|
|
# variable: gcc_older_than_4_9_0
|
|
# comments:
|
|
# These older versions of gcc may support microarchitectures such as
|
|
# sandybridge, but the '-march=' flag uses a different label syntax.
|
|
# In newer versions, '-march=sandybridge' is the preferred syntax [1].
|
|
# However, in older versions, the syntax for the same compiler option
|
|
# is '-march=corei7-avx' [2].
|
|
#
|
|
# [1] https://gcc.gnu.org/onlinedocs/gcc-4.9.0/gcc/i386-and-x86-64-Options.html#i386-and-x86-64-Options
|
|
# [2] https://gcc.gnu.org/onlinedocs/gcc-4.8.5/gcc/i386-and-x86-64-Options.html#i386-and-x86-64-Options
|
|
#
|
|
# range: gcc < 6.1 (ie: 5.5 or older)
|
|
# variable: gcc_older_than_6_1_0
|
|
# comments:
|
|
# These older versions of gcc do not explicitly support the Zen (Zen1)
|
|
# microarchitecture; the newest microarchitectural value understood by
|
|
# these versions is '-march=bdver4' [3]. However, basic support for these
|
|
# older versions can be attained in a roundabout way by starting with the
|
|
# instruction sets enabled by '-march=bdver4' and then disabling the
|
|
# instruction sets that were removed in the transition from Excavator to
|
|
# Zen, namely: FMA4, TBM, XOP, and LWP. Newer versions of gcc support Zen
|
|
# via the '-march=znver1' option [4].
|
|
#
|
|
# [3] https://gcc.gnu.org/onlinedocs/gcc-5.5.0/gcc/x86-Options.html#x86-Options
|
|
# [4] https://gcc.gnu.org/onlinedocs/gcc-6.1.0/gcc/x86-Options.html#x86-Options
|
|
#
|
|
# range: gcc < 9.1 (ie: 8.3 or older)
|
|
# variable: gcc_older_than_9_1_0
|
|
# comments:
|
|
# These older versions of gcc do not explicitly support the Zen2
|
|
# microarchitecture; the newest microarchitectural value understood by
|
|
# these versions is either '-march=znver1' (if !gcc_older_than_6_1_0) [5]
|
|
# or '-march=bdver4' (if gcc_older_than_6_1_0) [3]. If gcc is 6.1 or
|
|
# newer, '-march=znver1' may be used (since the instruction sets it
|
|
# enables are a subset of those enabled by '-march=znver2'); otherwise,
|
|
# '-march=bdver4' must be used in conjuction with disabling the
|
|
# instruction sets that were removed in the transition from Excavator to
|
|
# Zen, as described in the section above for gcc_older_than_6_1_0.
|
|
# Newer versions of gcc support Zen2 via the '-march=znver2' option [6].
|
|
#
|
|
# [5] https://gcc.gnu.org/onlinedocs/gcc-8.3.0/gcc/x86-Options.html#x86-Options
|
|
# [6] https://gcc.gnu.org/onlinedocs/gcc-9.1.0/gcc/x86-Options.html#x86-Options
|
|
#
|
|
|
|
gcc_older_than_4_9_0='no'
|
|
gcc_older_than_6_1_0='no'
|
|
gcc_older_than_9_1_0='no'
|
|
gcc_older_than_11_2_0='no'
|
|
echo "${script_name}: checking ${cc} ${cc_version} against known consequential version ranges."
|
|
|
|
# gcc
|
|
if [ "x${cc_vendor}" = "xgcc" ]; then
|
|
|
|
# Check for gcc < 4.9.0 (ie: 4.8.5 or older).
|
|
if [ ${cc_major} -eq 4 ]; then
|
|
if [ ${cc_minor} -lt 9 ]; then
|
|
echo "${script_name}: note: found ${cc} version older than 4.9.0."
|
|
gcc_older_than_4_9_0='yes'
|
|
fi
|
|
fi
|
|
|
|
# Check for gcc < 6.1.0 (ie: 5.5 or older).
|
|
if [ ${cc_major} -lt 6 ]; then
|
|
echo "${script_name}: note: found ${cc} version older than 6.1."
|
|
gcc_older_than_6_1_0='yes'
|
|
fi
|
|
|
|
# Check for gcc < 9.1.0 (ie: 8.3 or older).
|
|
if [ ${cc_major} -lt 9 ]; then
|
|
echo "${script_name}: note: found ${cc} version older than 9.1."
|
|
gcc_older_than_9_1_0='yes'
|
|
fi
|
|
|
|
# Check for gcc < 11.2.0 (ie: 11.2 or older).
|
|
if [ ${cc_major} -lt 11 ]; then
|
|
echo "${script_name}: note: found ${cc} version older than 11.2.0."
|
|
gcc_older_than_11_2_0='yes'
|
|
else
|
|
if [ ${cc_major} -eq 11 ]; then
|
|
if [ ${cc_minor} -lt 2 ]; then
|
|
echo "${script_name}: note: found ${cc} version older than 11.2.0."
|
|
gcc_older_than_11_2_0='yes'
|
|
fi
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
# icc
|
|
if [ "x${cc_vendor}" = "xicc" ]; then
|
|
:
|
|
fi
|
|
|
|
# clang
|
|
if [ "x${cc_vendor}" = "xclang" ]; then
|
|
:
|
|
fi
|
|
}
|
|
|
|
check_assembler()
|
|
{
|
|
local cc asm_dir cflags asm_fp
|
|
|
|
cc="${found_cc}"
|
|
|
|
# The directory where the assembly files will be.
|
|
asm_dir="${dist_path}/build"
|
|
|
|
# Most of the time, we won't need any additional compiler flags.
|
|
cflags=""
|
|
|
|
echo "${script_name}: checking for blacklisted configurations due to as ${bu_version}."
|
|
|
|
#
|
|
# Check support for FMA4 (amd: bulldozer).
|
|
#
|
|
asm_fp=$(find ${asm_dir} -name "fma4.s")
|
|
knows_fma4=$(try_assemble "${cc}" "${cflags}" "${asm_fp}")
|
|
|
|
if [ "x${knows_fma4}" == "xno" ]; then
|
|
blacklistbu_add "bulldozer"
|
|
fi
|
|
|
|
#
|
|
# Check support for AVX (intel: sandybridge+, amd: piledriver+).
|
|
#
|
|
asm_fp=$(find ${asm_dir} -name "avx.s")
|
|
knows_avx=$(try_assemble "${cc}" "${cflags}" "${asm_fp}")
|
|
|
|
if [ "x${knows_avx}" == "xno" ]; then
|
|
blacklistbu_add "sandybridge"
|
|
fi
|
|
|
|
#
|
|
# Check support for FMA3 (intel: haswell+, amd: piledriver+).
|
|
#
|
|
asm_fp=$(find ${asm_dir} -name "fma3.s")
|
|
knows_fma3=$(try_assemble "${cc}" "${cflags}" "${asm_fp}")
|
|
|
|
if [ "x${knows_fma3}" == "xno" ]; then
|
|
blacklistbu_add "haswell"
|
|
blacklistbu_add "piledriver"
|
|
blacklistbu_add "steamroller"
|
|
blacklistbu_add "excavator"
|
|
blacklistbu_add "skx"
|
|
fi
|
|
|
|
#
|
|
# Check support for AVX-512f (knl, skx).
|
|
#
|
|
|
|
# The assembler on OS X won't recognize AVX-512 without help.
|
|
if [ "${cc_vendor}" == "clang" ]; then
|
|
cflags="-march=knl"
|
|
fi
|
|
|
|
asm_fp=$(find ${asm_dir} -name "avx512f.s")
|
|
knows_avx512f=$(try_assemble "${cc}" "${cflags}" "${asm_fp}")
|
|
|
|
if [ "x${knows_avx512f}" == "xno" ]; then
|
|
blacklistbu_add "knl"
|
|
blacklistbu_add "skx"
|
|
fi
|
|
|
|
#
|
|
# Check support for AVX-512dq (skx).
|
|
#
|
|
|
|
# The assembler on OS X won't recognize AVX-512 without help.
|
|
if [ "${cc_vendor}" == "clang" ]; then
|
|
cflags="-march=skylake-avx512"
|
|
fi
|
|
|
|
asm_fp=$(find ${asm_dir} -name "avx512dq.s")
|
|
knows_avx512dq=$(try_assemble "${cc}" "${cflags}" "${asm_fp}")
|
|
|
|
if [ "x${knows_avx512dq}" == "xno" ]; then
|
|
blacklistbu_add "skx"
|
|
fi
|
|
}
|
|
|
|
try_assemble()
|
|
{
|
|
local cc cflags asm_src asm_base asm_bin rval
|
|
|
|
cc="$1"
|
|
cflags="$2"
|
|
asm_src="$3"
|
|
|
|
# Construct the filename to the .o file corresponding to asm_src.
|
|
# (Strip the filepath, then the file extension, and then add ".o".)
|
|
asm_base=${asm_src##*/}
|
|
asm_base=${asm_base%.*}
|
|
asm_bin="${asm_base}.o"
|
|
|
|
# Try to assemble the file.
|
|
${cc} ${cflags} -c ${asm_src} -o ${asm_bin} > /dev/null 2>&1
|
|
|
|
if [ "$?" == 0 ]; then
|
|
rval='yes'
|
|
else
|
|
rval='no'
|
|
fi
|
|
|
|
# Remove the object file.
|
|
rm -f "${asm_bin}"
|
|
|
|
# Return the result.
|
|
echo "${rval}"
|
|
}
|
|
|
|
set_default_version()
|
|
{
|
|
# The path to the version file.
|
|
version_file=$1
|
|
|
|
echo "${script_name}: determining default version string."
|
|
|
|
# Use what's in the version file as-is.
|
|
version="AOCL-BLAS $(cat "${version_file}") Build $(date +%Y%m%d)"
|
|
}
|
|
|
|
|
|
|
|
#
|
|
# -- main function -------------------------------------------------------------
|
|
#
|
|
|
|
main()
|
|
{
|
|
#declare -A config_registry
|
|
#declare -A kernel_registry
|
|
#declare -A kconfig_registry
|
|
|
|
# -- Basic names and paths --
|
|
|
|
# The name of the script, stripped of any preceeding path.
|
|
script_name=${0##*/}
|
|
|
|
# The path to the script. We need this to find the top-level directory
|
|
# of the source distribution in the event that the user has chosen to
|
|
# build elsewhere.
|
|
dist_path=${0%/${script_name}}
|
|
|
|
# The path to the directory in which we are building. We do this to
|
|
# make explicit that we distinguish between the top-level directory
|
|
# of the distribution and the directory in which we are building.
|
|
cur_dirpath="."
|
|
|
|
# The file in which the version string is kept.
|
|
version_file="version"
|
|
version_filepath="${dist_path}/${version_file}"
|
|
|
|
# The name of and path to the directory named "build" in the top-level
|
|
# directory of the source distribution.
|
|
build_dir='build'
|
|
build_dirpath="${dist_path}/${build_dir}"
|
|
|
|
# The name/path to the registry (master list) of supported configurations.
|
|
registry_file="config_registry"
|
|
registry_filepath=${dist_path}/${registry_file}
|
|
|
|
# The names/paths for the template config.mk.in and its instantiated
|
|
# counterpart.
|
|
config_mk_in='config.mk.in'
|
|
config_mk_out='config.mk'
|
|
config_mk_in_path="${build_dirpath}/${config_mk_in}"
|
|
config_mk_out_path="${cur_dirpath}/${config_mk_out}"
|
|
|
|
# The names/paths for the template bli_config.h.in and its instantiated
|
|
# counterpart.
|
|
bli_config_h_in='bli_config.h.in'
|
|
bli_config_h_out='bli_config.h'
|
|
bli_config_h_in_path="${build_dirpath}/${bli_config_h_in}"
|
|
bli_config_h_out_path="${cur_dirpath}/${bli_config_h_out}"
|
|
|
|
# The names/paths for the template bli_addon.h.in and its instantiated
|
|
# counterpart.
|
|
bli_addon_h_in='bli_addon.h.in'
|
|
bli_addon_h_out='bli_addon.h'
|
|
bli_addon_h_in_path="${build_dirpath}/${bli_addon_h_in}"
|
|
bli_addon_h_out_path="${cur_dirpath}/${bli_addon_h_out}"
|
|
|
|
# Path to 'mirror-tree.sh' script.
|
|
mirror_tree_sh="${build_dirpath}/mirror-tree.sh"
|
|
|
|
# Path to 'gen-make-frags.sh' script and directory.
|
|
gen_make_frags_dirpath="${build_dirpath}/gen-make-frags"
|
|
gen_make_frags_sh="${gen_make_frags_dirpath}/gen-make-frag.sh"
|
|
|
|
# The name of the (top-level) configuration directory.
|
|
config_dir='config'
|
|
config_dirpath="${dist_path}/${config_dir}"
|
|
|
|
# The name of the (top-level) kernels directory.
|
|
kernels_dir='kernels'
|
|
kernels_dirpath="${dist_path}/${kernels_dir}"
|
|
|
|
# The name of the (top-level) reference kernels directory.
|
|
refkern_dir='ref_kernels'
|
|
refkern_dirpath="${dist_path}/${refkern_dir}"
|
|
|
|
# The root directory of the BLIS framework.
|
|
frame_dir='frame'
|
|
frame_dirpath="${dist_path}/${frame_dir}"
|
|
|
|
# The root directory of the BLIS framework.
|
|
aocldtl_dir='aocl_dtl'
|
|
aocldtl_dirpath="${dist_path}/${aocldtl_dir}"
|
|
# The names of the addons.
|
|
addon_dir='addon'
|
|
addon_dirpath="${dist_path}/${addon_dir}"
|
|
|
|
# The name of the sandbox directory.
|
|
sandbox_dir='sandbox'
|
|
sandbox_dirpath="${dist_path}/${sandbox_dir}"
|
|
|
|
# The name of the directory in which object files will be kept.
|
|
obj_dir='obj'
|
|
obj_dirpath="${cur_dirpath}/${obj_dir}"
|
|
|
|
# The name of the directory in which libraries will be kept.
|
|
lib_dir='lib'
|
|
lib_dirpath="${cur_dirpath}/${lib_dir}"
|
|
|
|
# The name of the directory in which headers will be kept.
|
|
include_dir='include'
|
|
include_dirpath="${cur_dirpath}/${include_dir}"
|
|
|
|
# The name of the directory in which the BLAS test suite is kept.
|
|
blastest_dir='blastest'
|
|
|
|
# The name of the directory in which the BLIS test suite is kept.
|
|
testsuite_dir='testsuite'
|
|
|
|
# -- Version-related --
|
|
|
|
# The shared library (.so) version file.
|
|
so_version_file='so_version'
|
|
so_version_filepath="${dist_path}/${so_version_file}"
|
|
|
|
# The major and minor/build .so version numbers.
|
|
so_version_major=''
|
|
so_version_minorbuild=''
|
|
|
|
# -- configure options --
|
|
|
|
# Define the default prefix so that the print_usage() function can
|
|
# output it in the --help text.
|
|
prefix_def='/usr/local'
|
|
|
|
# The installation prefix, assigned its default value, and a flag to
|
|
# track whether or not it was given by the user.
|
|
prefix=${prefix_def}
|
|
prefix_flag=''
|
|
|
|
# The installation exec_prefix, assigned its default value, and a flag to
|
|
# track whether or not it was given by the user.
|
|
exec_prefix='${prefix}'
|
|
exec_prefix_flag=''
|
|
|
|
# The installation libdir, assigned its default value, and a flag to
|
|
# track whether or not it was given by the user.
|
|
libdir='${exec_prefix}/lib'
|
|
libdir_flag=''
|
|
|
|
# The installation includedir, assigned its default value, and a flag to
|
|
# track whether or not it was given by the user.
|
|
includedir='${prefix}/include'
|
|
includedir_flag=''
|
|
|
|
# The installation sharedir, assigned its default value, and a flag to
|
|
# track whether or not it was given by the user.
|
|
sharedir='${prefix}/share'
|
|
sharedir_flag=''
|
|
|
|
# The preset value of CFLAGS and LDFLAGS (ie: compiler and linker flags
|
|
# to use in addition to those determined by the build system).
|
|
cflags_preset=''
|
|
ldflags_preset=''
|
|
|
|
# The user-given debug type and a flag indicating it was given.
|
|
debug_type=''
|
|
debug_flag=''
|
|
|
|
# The system flag.
|
|
enable_system='yes'
|
|
|
|
# The threading flag.
|
|
threading_model='off'
|
|
|
|
# The method of assigning micropanels to threads in the JR and JR loops.
|
|
thread_part_jrir='slab'
|
|
|
|
# Option variables.
|
|
quiet_flag=''
|
|
show_config_list=''
|
|
|
|
# Additional flags.
|
|
enable_verbose='no'
|
|
enable_arg_max_hack='no'
|
|
enable_static='yes'
|
|
enable_shared='yes'
|
|
enable_rpath='no'
|
|
export_shared='public'
|
|
enable_pba_pools='yes'
|
|
enable_sba_pools='yes'
|
|
enable_mem_tracing='no'
|
|
int_type_size=0
|
|
blas_int_type_size=32
|
|
enable_blas='yes'
|
|
enable_cblas='no'
|
|
enable_mixed_dt='yes'
|
|
enable_mixed_dt_extra_mem='yes'
|
|
|
|
enable_mnk1_matrix='yes'
|
|
enable_tiny_matrix='yes'
|
|
enable_small_matrix='yes'
|
|
enable_sup_handling='yes'
|
|
enable_small_matrix_trsm='yes'
|
|
enable_trsm_preinversion='yes'
|
|
|
|
enable_memkind='' # The default memkind value is determined later on.
|
|
enable_aocl_dynamic='yes'
|
|
enable_security_flags='yes'
|
|
force_version='no'
|
|
complex_return='default'
|
|
disable_blis_arch_type='unset'
|
|
rename_blis_arch_type='BLIS_ARCH_TYPE'
|
|
rename_blis_model_type='BLIS_MODEL_TYPE'
|
|
|
|
# DTL tracing/logging flag.
|
|
enable_aocl_dtl='off'
|
|
aocl_dtl_trace_level_number='unset'
|
|
|
|
# The addon flag and names.
|
|
addon_flag=''
|
|
addon_list=''
|
|
|
|
# The sandbox flag and name.
|
|
sandbox_flag=''
|
|
sandbox=''
|
|
|
|
# -- Configuration registry --
|
|
|
|
# The name of the chosen configuration (the configuration "family").
|
|
config_name=''
|
|
|
|
# The list of sub-configurations associated with config_name.
|
|
config_list=''
|
|
|
|
# The list of kernel sets that will be needed by the sub-configurations
|
|
# in config_list..
|
|
kernel_list=''
|
|
|
|
# The list of kernel:sub-configuration pairs for all kernels contained
|
|
# in kernel_list.
|
|
kconfig_map=''
|
|
|
|
# -- Out-of-tree --
|
|
|
|
# Whether we are building out-of-tree.
|
|
configured_oot="no"
|
|
|
|
# Dummy file. Used to check whether the cwd is the same as the top-level
|
|
# source distribution directory.
|
|
dummy_file='_blis_dir_detect.tmp'
|
|
|
|
# -- Debugging --
|
|
|
|
# A global flag to help debug the compilation command for the executable
|
|
# that configure builds on-the-fly to perform hardware auto-detection.
|
|
debug_auto_detect="no"
|
|
|
|
|
|
|
|
# -- Command line option/argument parsing ----------------------------------
|
|
|
|
found=true
|
|
while $found = true; do
|
|
|
|
# Process our command line options.
|
|
unset OPTIND
|
|
while getopts ":hp:d:e:a:s:t:r:qci:b:-:" opt; do
|
|
case $opt in
|
|
-)
|
|
case "$OPTARG" in
|
|
help)
|
|
print_usage
|
|
;;
|
|
quiet)
|
|
quiet_flag=1
|
|
;;
|
|
prefix=*)
|
|
prefix_flag=1
|
|
prefix=${OPTARG#*=}
|
|
;;
|
|
exec-prefix=*)
|
|
exec_prefix_flag=1
|
|
exec_prefix=${OPTARG#*=}
|
|
;;
|
|
libdir=*)
|
|
libdir_flag=1
|
|
libdir=${OPTARG#*=}
|
|
;;
|
|
includedir=*)
|
|
includedir_flag=1
|
|
includedir=${OPTARG#*=}
|
|
;;
|
|
sharedir=*)
|
|
sharedir_flag=1
|
|
sharedir=${OPTARG#*=}
|
|
;;
|
|
enable-debug)
|
|
debug_flag=1
|
|
debug_type=noopt
|
|
;;
|
|
enable-debug=*)
|
|
debug_flag=1
|
|
debug_type=${OPTARG#*=}
|
|
;;
|
|
disable-debug)
|
|
debug_flag=0
|
|
;;
|
|
enable-verbose-make)
|
|
enable_verbose='yes'
|
|
;;
|
|
disable-verbose-make)
|
|
enable_verbose='no'
|
|
;;
|
|
enable-arg-max-hack)
|
|
enable_arg_max_hack='yes'
|
|
;;
|
|
disable-arg-max-hack)
|
|
enable_arg_max_hack='no'
|
|
;;
|
|
enable-static)
|
|
enable_static='yes'
|
|
;;
|
|
disable-static)
|
|
enable_static='no'
|
|
;;
|
|
enable-shared)
|
|
enable_shared='yes'
|
|
;;
|
|
disable-shared)
|
|
enable_shared='no'
|
|
;;
|
|
enable-rpath)
|
|
enable_rpath='yes'
|
|
;;
|
|
disable-rpath)
|
|
enable_rpath='no'
|
|
;;
|
|
export-shared=*)
|
|
export_shared=${OPTARG#*=}
|
|
;;
|
|
enable-system)
|
|
enable_system='yes'
|
|
;;
|
|
disable-system)
|
|
enable_system='no'
|
|
;;
|
|
enable-threading=*)
|
|
threading_model=${OPTARG#*=}
|
|
;;
|
|
disable-threading)
|
|
threading_model='off'
|
|
;;
|
|
thread-part-jrir=*)
|
|
thread_part_jrir=${OPTARG#*=}
|
|
;;
|
|
enable-pba-pools)
|
|
enable_pba_pools='yes'
|
|
;;
|
|
disable-pba-pools)
|
|
enable_pba_pools='no'
|
|
;;
|
|
enable-sba-pools)
|
|
enable_sba_pools='yes'
|
|
;;
|
|
disable-sba-pools)
|
|
enable_sba_pools='no'
|
|
;;
|
|
enable-mem-tracing)
|
|
enable_mem_tracing='yes'
|
|
;;
|
|
disable-mem-tracing)
|
|
enable_mem_tracing='no'
|
|
;;
|
|
enable-addon=*)
|
|
addon_flag=1
|
|
addon_name=${OPTARG#*=}
|
|
# Append the addon name to the list.
|
|
addon_list="${addon_list} ${addon_name}"
|
|
;;
|
|
disable-addon)
|
|
addon_flag=''
|
|
;;
|
|
enable-sandbox=*)
|
|
sandbox_flag=1
|
|
sandbox=${OPTARG#*=}
|
|
;;
|
|
disable-sandbox)
|
|
sandbox_flag=''
|
|
;;
|
|
int-size=*)
|
|
int_type_size=${OPTARG#*=}
|
|
;;
|
|
blas-int-size=*)
|
|
blas_int_type_size=${OPTARG#*=}
|
|
;;
|
|
enable-blas)
|
|
enable_blas='yes'
|
|
;;
|
|
disable-blas)
|
|
enable_blas='no'
|
|
;;
|
|
enable-cblas)
|
|
enable_cblas='yes'
|
|
;;
|
|
disable-cblas)
|
|
enable_cblas='no'
|
|
;;
|
|
enable-mixed-dt)
|
|
enable_mixed_dt='yes'
|
|
;;
|
|
disable-mixed-dt)
|
|
enable_mixed_dt='no'
|
|
;;
|
|
enable-mixed-dt-extra-mem)
|
|
enable_mixed_dt_extra_mem='yes'
|
|
;;
|
|
disable-mixed-dt-extra-mem)
|
|
enable_mixed_dt_extra_mem='no'
|
|
;;
|
|
enable-mnk1-matrix)
|
|
enable_mnk1_matrix='yes'
|
|
;;
|
|
disable-mnk1-matrix)
|
|
enable_mnk1_matrix='no'
|
|
;;
|
|
enable-tiny-matrix)
|
|
enable_tiny_matrix='yes'
|
|
;;
|
|
disable-tiny-matrix)
|
|
enable_tiny_matrix='no'
|
|
;;
|
|
enable-small-matrix)
|
|
enable_small_matrix='yes'
|
|
;;
|
|
disable-small-matrix)
|
|
enable_small_matrix='no'
|
|
;;
|
|
enable-sup-handling)
|
|
enable_sup_handling='yes'
|
|
;;
|
|
disable-sup-handling)
|
|
enable_sup_handling='no'
|
|
;;
|
|
enable-small-matrix-trsm)
|
|
enable_small_matrix_trsm='yes'
|
|
;;
|
|
disable-small-matrix-trsm)
|
|
enable_small_matrix_trsm='no'
|
|
;;
|
|
with-memkind)
|
|
enable_memkind='yes'
|
|
;;
|
|
without-memkind)
|
|
enable_memkind='no'
|
|
;;
|
|
enable-trsm-preinversion)
|
|
enable_trsm_preinversion='yes'
|
|
;;
|
|
disable-trsm-preinversion)
|
|
enable_trsm_preinversion='no'
|
|
;;
|
|
enable-aocl-dynamic)
|
|
enable_aocl_dynamic='yes'
|
|
;;
|
|
disable-aocl-dynamic)
|
|
enable_aocl_dynamic='no'
|
|
;;
|
|
enable-security-flags)
|
|
enable_security_flags='yes'
|
|
;;
|
|
disable-security-flags)
|
|
enable_security_flags='no'
|
|
;;
|
|
force-version=*)
|
|
force_version=${OPTARG#*=}
|
|
;;
|
|
show-config-list)
|
|
show_config_list=1
|
|
;;
|
|
complex-return=*)
|
|
complex_return=${OPTARG#*=}
|
|
;;
|
|
enable-blis-arch-type)
|
|
disable_blis_arch_type='no'
|
|
;;
|
|
disable-blis-arch-type)
|
|
disable_blis_arch_type='yes'
|
|
;;
|
|
rename-blis-arch-type=*)
|
|
rename_blis_arch_type=${OPTARG#*=}
|
|
;;
|
|
rename-blis-model-type=*)
|
|
rename_blis_model_type=${OPTARG#*=}
|
|
;;
|
|
enable-aocl-dtl=*)
|
|
enable_aocl_dtl=${OPTARG#*=}
|
|
;;
|
|
disable-aocl-dtl=*)
|
|
enable_aocl_dtl='off'
|
|
;;
|
|
aocl-dtl-trace-level=*)
|
|
aocl_dtl_trace_level_number=${OPTARG#*=}
|
|
;;
|
|
*)
|
|
print_usage
|
|
;;
|
|
esac;;
|
|
h)
|
|
print_usage
|
|
;;
|
|
p)
|
|
prefix_flag=1
|
|
prefix=$OPTARG
|
|
;;
|
|
d)
|
|
debug_flag=1
|
|
debug_type=$OPTARG
|
|
;;
|
|
e)
|
|
export_shared=$OPTARG
|
|
;;
|
|
a)
|
|
addon_flag=1
|
|
addon_name=$OPTARG
|
|
# Append the addon name to the list.
|
|
addon_list="${addon_list} ${addon_name}"
|
|
;;
|
|
s)
|
|
sandbox_flag=1
|
|
sandbox=$OPTARG
|
|
;;
|
|
q)
|
|
quiet_flag=1
|
|
;;
|
|
t)
|
|
threading_model=$OPTARG
|
|
;;
|
|
r)
|
|
thread_part_jrir=$OPTARG
|
|
;;
|
|
i)
|
|
int_type_size=$OPTARG
|
|
;;
|
|
b)
|
|
blas_int_type_size=$OPTARG
|
|
;;
|
|
c)
|
|
show_config_list=1
|
|
;;
|
|
\?)
|
|
print_usage
|
|
;;
|
|
esac
|
|
done
|
|
shift $(($OPTIND - 1))
|
|
|
|
# Parse environment variables
|
|
found=false
|
|
while [ $# -gt 0 ]; do
|
|
case $1 in
|
|
*=*)
|
|
var=`expr "$1" : '\([^=]*\)='`
|
|
value=`expr "$1" : '[^=]*=\(.*\)'`
|
|
eval $var=\$value
|
|
export $var
|
|
shift
|
|
found=true
|
|
;;
|
|
*)
|
|
break
|
|
;;
|
|
esac
|
|
done
|
|
done
|
|
|
|
|
|
# -- Check the operating system --------------------------------------------
|
|
|
|
os_name=$(uname -s)
|
|
os_vers=$(uname -r)
|
|
echo "${script_name}: detected ${os_name} kernel version ${os_vers}."
|
|
|
|
# Define a single variable off of which we can branch to tell if we are
|
|
# building for Windows.
|
|
is_win=no
|
|
if [[ $os_name == MSYS* ]] || \
|
|
[[ $os_name == MINGW* ]] || \
|
|
[[ $os_name == CYGWIN* ]] ; then
|
|
is_win=yes
|
|
fi
|
|
|
|
|
|
# -- Find a python interpreter ---------------------------------------------
|
|
|
|
# Acquire the python search order. This may vary based on the os found
|
|
# above.
|
|
python_search_list=$(get_python_search_list)
|
|
|
|
echo "${script_name}: python interpeter search list is: ${python_search_list}."
|
|
|
|
# Find a working python interpreter.
|
|
found_python=$(select_tool "${python_search_list}" "${PYTHON}")
|
|
|
|
# If we didn't find any working python interpreters, we print an error
|
|
# message.
|
|
if [ -z "${found_python}" ]; then
|
|
echo "${script_name}: *** Could not find working python interperter! Cannot continue."
|
|
exit 1
|
|
fi
|
|
|
|
echo "${script_name}: using '${found_python}' python interpreter."
|
|
|
|
|
|
# -- Check the python version ----------------------------------------------
|
|
|
|
# Check the python interpreter's version.
|
|
get_python_version
|
|
check_python
|
|
|
|
|
|
# -- Find a C compiler -----------------------------------------------------
|
|
|
|
# Acquire the compiler search order. This will vary based on the os found
|
|
# above.
|
|
cc_search_list=$(get_cc_search_list)
|
|
|
|
echo "${script_name}: C compiler search list is: ${cc_search_list}."
|
|
|
|
# Find a working C compiler.
|
|
found_cc=$(select_tool "${cc_search_list}" "${CC}")
|
|
|
|
# If we didn't find any working C compilers, we print an error message.
|
|
if [ -z "${found_cc}" ]; then
|
|
echo "${script_name}: *** Could not find working C compiler! Cannot continue."
|
|
exit 1
|
|
fi
|
|
|
|
echo "${script_name}: using '${found_cc}' C compiler."
|
|
|
|
# Also check the compiler to see if we are (cross-)compiling for Windows
|
|
if ${found_cc} -dM -E - < /dev/null 2> /dev/null | grep -q _WIN32; then
|
|
is_win=yes
|
|
fi
|
|
|
|
|
|
# -- Find a C++ compiler ---------------------------------------------------
|
|
|
|
# Acquire the compiler search order. This will vary based on the os
|
|
# found above.
|
|
cxx_search_list=$(get_cxx_search_list)
|
|
|
|
echo "${script_name}: C++ compiler search list is: ${cxx_search_list}."
|
|
|
|
# Find a working C++ compiler. NOTE: We can reuse the select_tool()
|
|
# function since it is written in a way that is general-purpose.
|
|
found_cxx=$(select_tool "${cxx_search_list}" "${CXX}")
|
|
|
|
# If we didn't find any working C++ compilers, we print an error message.
|
|
if [ -z "${found_cxx}" ]; then
|
|
echo "${script_name}: Could not find working C++ compiler! C++ will not be available in sandbox."
|
|
found_cxx="c++notfound"
|
|
fi
|
|
|
|
echo "${script_name}: using '${found_cxx}' C++ compiler (for sandbox only)."
|
|
|
|
|
|
# -- Check the compiler version --------------------------------------------
|
|
|
|
# Initialize the blacklist to empty.
|
|
blacklist_init
|
|
|
|
# Check the compiler's version. Certain versions of certain compilers
|
|
# will preclude building certain sub-configurations, which are added
|
|
# to a blacklist. We also make note of certain version ranges that
|
|
# will be useful to know about later.
|
|
get_compiler_version
|
|
check_compiler
|
|
check_compiler_version_ranges
|
|
|
|
# Now check the assembler's ability to assemble code. Older versions
|
|
# of binutils may not be aware of certain instruction sets. Those
|
|
# sub-configurations employing kernels that use such instruction sets
|
|
# will also be blacklisted.
|
|
get_binutils_version
|
|
check_assembler
|
|
|
|
# Remove duplicates and whitespace from the blacklist.
|
|
blacklist_cleanup
|
|
|
|
if [ -n "${config_blist}" ]; then
|
|
|
|
echo "${script_name}: configuration blacklist:"
|
|
echo "${script_name}: ${config_blist}"
|
|
fi
|
|
|
|
|
|
# -- Read the configuration registry ---------------------------------------
|
|
|
|
# Make sure the config registry file exists and can be opened.
|
|
if [ ! -f "${registry_filepath}" ]; then
|
|
|
|
echo "${script_name}: could not open '${registry_file}' file; cannot continue."
|
|
echo "${script_name}: BLIS distribution appears to be incomplete."
|
|
echo "${script_name}: *** Please verify source distribution."
|
|
|
|
exit 1
|
|
fi
|
|
|
|
# Read the registered configuration names and lists into associative
|
|
# arrays.
|
|
echo -n "${script_name}: reading configuration registry..."
|
|
read_registry_file ${registry_filepath}
|
|
echo "done."
|
|
|
|
# Report if additional configurations needed to be blacklisted.
|
|
# NOTE: This branch should never execute so long as indirect blacklisting
|
|
# is disabled. See comment regarding issue #214 in the definition of
|
|
# pass_config_kernel_registries().
|
|
if [ -n "${indirect_blist}" ]; then
|
|
echo "${script_name}: needed to indirectly blacklist additional configurations:"
|
|
echo "${script_name}: ${indirect_blist}"
|
|
fi
|
|
|
|
|
|
# -- Acquire the BLIS version ----------------------------------------------
|
|
|
|
# Set the 'version' variable to the default value (the 'git describe'
|
|
# augmented instance of whatever is in the 'version' file if this is a git
|
|
# clone, or whatever is in the 'version' file unmodified if it is a bare
|
|
# source release).
|
|
set_default_version "${version_filepath}"
|
|
|
|
# Initial message.
|
|
echo "${script_name}: starting configuration of BLIS ${version}."
|
|
|
|
# Check if the user requested a custom version string.
|
|
if [ "x${force_version}" = "xno" ]; then
|
|
echo "${script_name}: configuring with official version string."
|
|
else
|
|
echo "${script_name}: configuring with custom version string '${force_version}'."
|
|
version="${force_version}"
|
|
fi
|
|
|
|
|
|
# -- Acquire the shared library (.so) versions -----------------------------
|
|
|
|
# The first line of the 'so_version' file contains the .so major version.
|
|
so_version_major=$(cat ${so_version_filepath} | sed -n "1p")
|
|
|
|
# The second line contains the minor and build .so version numbers
|
|
# (separated by a '.').
|
|
so_version_minorbuild=$(cat ${so_version_filepath} | sed -n "2p")
|
|
|
|
echo "${script_name}: found shared library .so version '${so_version_major}.${so_version_minorbuild}'."
|
|
echo "${script_name}: .so major version: ${so_version_major}"
|
|
echo "${script_name}: .so minor.build version: ${so_version_minorbuild}"
|
|
|
|
|
|
# -- Various pre-configuration checks --------------------------------------
|
|
|
|
# Set config_name based on the number of arguments leftover (after command
|
|
# line option processing).
|
|
if [ $# = "0" ]; then
|
|
|
|
#configs_avail="auto "$(ls ${config_dirpath})
|
|
|
|
echo "${script_name}: "
|
|
echo "${script_name}: *** No configuration given! ***"
|
|
echo "${script_name}: "
|
|
echo "${script_name}: Default configuration behavior is not implemented (for your"
|
|
echo "${script_name}: own safety). Please re-run '${script_name}' and specify one"
|
|
echo "${script_name}: of the existing configurations in the source distribution's"
|
|
echo "${script_name} '${registry_file}' file:"
|
|
echo "${script_name}: "
|
|
#for k in "${!config_registry[@]}"; do
|
|
for cr_var in ${!config_registry_*}; do
|
|
|
|
#v=${config_registry[$k]}
|
|
k=${cr_var##config_registry_}; v=${!cr_var}
|
|
|
|
echo "${script_name}: $k (${v})"
|
|
done
|
|
echo "${script_name}: "
|
|
|
|
exit 1
|
|
|
|
elif [ $# != "1" ]; then # more than one configuration argument given.
|
|
|
|
print_usage
|
|
|
|
fi
|
|
|
|
if [ $1 = "auto" ]; then
|
|
|
|
echo "${script_name}: automatic configuration requested."
|
|
|
|
# Call the auto_detect() function and save the returned string in
|
|
# config_name.
|
|
config_name=$(auto_detect)
|
|
#config_name="generic"
|
|
|
|
# Debugging stuff. When confirming the behavior of auto_detect(),
|
|
# it is useful to output ${config_name}, which in theory could be
|
|
# set temoprarily to something other than the config_name, such as
|
|
# the compilation command.
|
|
if [ "${debug_auto_detect}" = "yes" ]; then
|
|
echo "auto-detect program compilation command: ${config_name}"
|
|
exit 1
|
|
fi
|
|
|
|
echo "${script_name}: hardware detection driver returned '${config_name}'."
|
|
|
|
# If the auto-detect code returned the "generic" string, it means we
|
|
# were unable to automatically detect the user's hardware type. While
|
|
# this is going to be a rare event, it will likely lead the user to
|
|
# experience much lower performance than expected, and thus we will
|
|
# warn them about it at the end of the configure output (to increase
|
|
# the chances that they see it).
|
|
if [ "${config_name}" = "generic" ]; then
|
|
|
|
warn_user_generic=1
|
|
else
|
|
warn_user_generic=0
|
|
fi
|
|
else
|
|
|
|
# Use the command line argument as the configuration name.
|
|
config_name=$1
|
|
|
|
echo "${script_name}: manual configuration requested; configuring with '${config_name}'."
|
|
|
|
fi
|
|
|
|
# Use the selected config name to look up the list of configurations
|
|
# and kernels associated with that name.
|
|
#config_list=${config_registry[${config_name}]}
|
|
#kernel_list=${kernel_registry[${config_name}]}
|
|
config_list=$(query_array "config_registry" ${config_name})
|
|
kernel_list=$(query_array "kernel_registry" ${config_name})
|
|
|
|
# Use the config_registry and kernel_registry to build a kconfig_registry
|
|
# for the selected config_name.
|
|
build_kconfig_registry "${config_name}"
|
|
|
|
# Print the configuration list and kernel list, if requested.
|
|
if [ "${show_config_list}" == "1" ]; then
|
|
|
|
echo "${script_name}: configuration list:"
|
|
#for k in "${!config_registry[@]}"; do
|
|
for cr_var in ${!config_registry_*}; do
|
|
|
|
#v=${config_registry[$k]}
|
|
k=${cr_var##config_registry_}; v=${!cr_var}
|
|
|
|
echo "${script_name}: $k: ${v}"
|
|
done
|
|
|
|
echo "${script_name}: kernel list:"
|
|
#for k in "${!kernel_registry[@]}"; do
|
|
for kr_var in ${!kernel_registry_*}; do
|
|
|
|
#v=${kernel_registry[$k]}
|
|
k=${kr_var##kernel_registry_}; v=${!kr_var}
|
|
|
|
echo "${script_name}: $k: ${v}"
|
|
done
|
|
|
|
echo "${script_name}: kernel-to-config map for '${config_name}':"
|
|
#for k in "${!kconfig_registry[@]}"; do
|
|
for kc_var in ${!kconfig_registry_*}; do
|
|
|
|
#v=${kconfig_registry[$k]}
|
|
k=${kc_var##kconfig_registry_}; v=${!kc_var}
|
|
|
|
echo "${script_name}: $k: ${v}"
|
|
done
|
|
fi
|
|
|
|
# For each kernel in the kernel list, reduce the list of associated
|
|
# sub-configurations (in the kconfig_registry) to a singleton using
|
|
# the following rules:
|
|
# 1. If the list is a singleton, use that name.
|
|
# 2. If the list contains a sub-configuration name that matches the
|
|
# kernel name, use that name.
|
|
# 3. Otherwise, use the first name in the list.
|
|
# We use the chosen singleton to ceate a "kernel:subconfig" pair, which
|
|
# we accumulate into a list. This list is the kernel-to-config map, or
|
|
# kconfig_map.
|
|
|
|
# We use a sorted version of kernel_list so that it ends up matching the
|
|
# display order of the kconfig_registry above.
|
|
kernel_list_sort=$(echo ${kernel_list} | xargs -n1 | sort -u)
|
|
|
|
kconfig_map=""
|
|
for kernel in ${kernel_list_sort}; do
|
|
|
|
#configs="${kconfig_registry[$kernel]}"
|
|
configs=$(query_array "kconfig_registry" ${kernel})
|
|
|
|
has_one_kernel=$(is_singleton "${configs}")
|
|
contains_kernel=$(is_in_list "${kernel}" "${configs}")
|
|
|
|
# Check if the list is a singleton.
|
|
if [ "${has_one_kernel}" == "true" ]; then
|
|
|
|
reducedclist="${configs}"
|
|
|
|
# Check if the list contains a sub-config name that matches the kernel.
|
|
elif [ "${contains_kernel}" == "true" ]; then
|
|
|
|
reducedclist="${kernel}"
|
|
|
|
# Otherwise, use the last name.
|
|
else
|
|
|
|
last_config=${configs##* }
|
|
reducedclist="${last_config}"
|
|
fi
|
|
|
|
# Create a new "kernel:subconfig" pair and add it to the kconfig_map
|
|
# list, removing whitespace.
|
|
new_pair="${kernel}:${reducedclist}"
|
|
kconfig_map=$(canonicalize_ws "${kconfig_map} ${new_pair}")
|
|
done
|
|
|
|
if [ "${show_config_list}" == "1" ]; then
|
|
|
|
echo "${script_name}: kernel-to-config map for '${config_name}' (chosen pairs):"
|
|
for k in ${kconfig_map}; do
|
|
echo "${script_name}: $k"
|
|
done
|
|
fi
|
|
|
|
|
|
echo "${script_name}: checking configuration against contents of '${registry_file}'."
|
|
|
|
# First, ensure that the config name is registered (ie: it is present
|
|
# in the config_registry file).
|
|
if [ -z "${config_list}" ]; then
|
|
|
|
# NOTE: This branch should never execute when using auto-detection,
|
|
# but we have it here just in case.
|
|
if [ $1 = "auto" ]; then
|
|
|
|
echo "${script_name}: 'auto-detected configuration '${config_name}' is NOT registered!"
|
|
echo "${script_name}: "
|
|
echo "${script_name}: *** Cannot continue with unregistered configuration '${config_name}'. ***"
|
|
echo "${script_name}: "
|
|
exit 1;
|
|
|
|
else
|
|
|
|
# At this point, we know: (a) config_list is empty; and (b) the user
|
|
# requested manual configuration. If the config_name given by the
|
|
# user is present in the configuration blacklist (config_blist),
|
|
# then we can deduce why the config_list is empty: because the only
|
|
# subconfig implied by config_name is blacklisted. Thus, we cannot
|
|
# proceed.
|
|
|
|
if [ $(is_in_list "${config_name}" "${config_blist}") == "true" ]; then
|
|
|
|
echo "${script_name}: 'user-specified configuration '${config_name}' is blacklisted!"
|
|
echo "${script_name}: "
|
|
echo "${script_name}: *** Cannot continue with blacklisted configuration '${config_name}'. ***"
|
|
echo "${script_name}: *** Try updating your compiler and/or assembler (binutils) versions. ***"
|
|
echo "${script_name}: "
|
|
exit 1;
|
|
else
|
|
|
|
# If config_name is NOT present in config_blist, then we know
|
|
# that config_list is empty simply because config_name is
|
|
# unregistered.
|
|
|
|
echo "${script_name}: 'user-specified configuration '${config_name}' is NOT registered!"
|
|
echo "${script_name}: "
|
|
echo "${script_name}: *** Cannot continue with unregistered configuration '${config_name}'. ***"
|
|
echo "${script_name}: "
|
|
exit 1;
|
|
fi
|
|
fi
|
|
else
|
|
|
|
# This branch executes when the configuration is found to be present
|
|
# (i.e. registered) in the config_registry file.
|
|
|
|
echo "${script_name}: configuration '${config_name}' is registered."
|
|
echo "${script_name}: '${config_name}' is defined as having the following sub-configurations:"
|
|
echo "${script_name}: ${config_list}"
|
|
echo "${script_name}: which collectively require the following kernels:"
|
|
echo "${script_name}: ${kernel_list}"
|
|
|
|
fi
|
|
|
|
# Based on the number of sub-configurations, set default value for disable_blis_arch_type
|
|
# (if user hasn't set option). BLIS_ARCH_TYPE functionality only makes sense for use with
|
|
# processor families containing multiple sub-configurations, but user can force the
|
|
# functionality to be enabled/disabled with --enable-blis-arch-type/--disable-blis-arch-type
|
|
# configure options.
|
|
if [ "x${disable_blis_arch_type}" = "xunset" ]; then
|
|
config_list_count=$(echo ${config_list} |wc -w)
|
|
if [ "x${config_list_count}" = "x1" ]; then
|
|
disable_blis_arch_type='yes'
|
|
else
|
|
disable_blis_arch_type='no'
|
|
fi
|
|
fi
|
|
|
|
echo "${script_name}: checking sub-configurations:"
|
|
|
|
# Now, verify that the constituent configurations associated with the
|
|
# config name are all valid.
|
|
for conf in ${config_list}; do
|
|
|
|
# First confirm that the current configuration is registered.
|
|
#this_clist=${config_registry[${conf}]}
|
|
this_clist=$(query_array "config_registry" ${conf})
|
|
|
|
# If the config_list associated with conf is empty, then it was
|
|
# never entered into the config_registry to begin with. Thus,
|
|
# conf must be unregistered.
|
|
if [ -z "${this_clist}" ]; then
|
|
echo "${script_name}: '${conf}' is NOT registered!"
|
|
echo "${script_name}: "
|
|
echo "${script_name}: *** Cannot continue with unregistered configuration '${conf}'. ***"
|
|
echo "${script_name}: "
|
|
exit 1;
|
|
else
|
|
echo -n "${script_name}: '${conf}' is registered."
|
|
fi
|
|
|
|
# Then confirm that the current sub-configuration directory exists.
|
|
if [ ! -d "${config_dirpath}/${conf}" ]; then
|
|
echo "..but does NOT exist!"
|
|
echo "${script_name}: "
|
|
echo "${script_name}: *** Cannot continue with nonexistent configuration '${conf}'. ***"
|
|
echo "${script_name}: "
|
|
exit 1;
|
|
else
|
|
echo "..and exists."
|
|
fi
|
|
done
|
|
|
|
|
|
echo "${script_name}: checking sub-configurations' requisite kernels:"
|
|
|
|
# Also, let's verify that the requisite kernel sets associated with
|
|
# the config name all correspond to directories that exist.
|
|
for kernel in ${kernel_list}; do
|
|
|
|
echo -n "${script_name}: '${kernel}' kernels..."
|
|
|
|
# Confirm that the current kernel sub-directory exists.
|
|
if [ ! -d "${kernels_dirpath}/${kernel}" ]; then
|
|
echo "do NOT exist!"
|
|
echo "${script_name}: "
|
|
echo "${script_name}: *** Cannot continue with nonexistent kernel '${kernel}'. ***"
|
|
echo "${script_name}: "
|
|
exit 1;
|
|
else
|
|
echo "exist."
|
|
fi
|
|
done
|
|
|
|
# In order to determine the default behavior of the --with[out]-memkind
|
|
# option, we try to detect whether libmemkind is available. If it is,
|
|
# the default implied option will be --with-memkind; otherwise, will be
|
|
# --without-memkind.
|
|
has_memkind=$(has_libmemkind)
|
|
|
|
# Try to determine whether the chosen compiler supports #pragma omp simd.
|
|
pragma_omp_simd=$(has_pragma_omp_simd)
|
|
|
|
|
|
# -- Prepare variables for subsitution into template files -----------------
|
|
|
|
# Parse the status of the prefix option and echo feedback.
|
|
if [ -n "${prefix_flag}" ]; then
|
|
echo "${script_name}: detected --prefix='${prefix}'."
|
|
else
|
|
echo "${script_name}: no install prefix option given; defaulting to '${prefix}'."
|
|
fi
|
|
|
|
# Parse the status of the exec_prefix option and echo feedback.
|
|
if [ -n "${exec_prefix_flag}" ]; then
|
|
echo "${script_name}: detected --exec-prefix='${exec_prefix}'."
|
|
else
|
|
echo "${script_name}: no install exec_prefix option given; defaulting to PREFIX."
|
|
fi
|
|
|
|
# Parse the status of the libdir option and echo feedback.
|
|
if [ -n "${libdir_flag}" ]; then
|
|
echo "${script_name}: detected --libdir='${libdir}'."
|
|
else
|
|
echo "${script_name}: no install libdir option given; defaulting to EXECPREFIX/lib."
|
|
fi
|
|
|
|
# Parse the status of the includedir option and echo feedback.
|
|
if [ -n "${includedir_flag}" ]; then
|
|
echo "${script_name}: detected --includedir='${includedir}'."
|
|
else
|
|
echo "${script_name}: no install includedir option given; defaulting to PREFIX/include."
|
|
fi
|
|
|
|
# Parse the status of the sharedir option and echo feedback.
|
|
if [ -n "${sharedir_flag}" ]; then
|
|
echo "${script_name}: detected --sharedir='${sharedir}'."
|
|
else
|
|
echo "${script_name}: no install sharedir option given; defaulting to PREFIX/share."
|
|
fi
|
|
|
|
# Echo the installation directories that we settled on.
|
|
echo "${script_name}: final installation directories:"
|
|
echo "${script_name}: prefix: "${prefix}
|
|
echo "${script_name}: exec_prefix: "${exec_prefix}
|
|
echo "${script_name}: libdir: "${libdir}
|
|
echo "${script_name}: includedir: "${includedir}
|
|
echo "${script_name}: sharedir: "${sharedir}
|
|
echo "${script_name}: NOTE: the variables above can be overridden when running make."
|
|
|
|
# Check if CFLAGS is non-empty.
|
|
if [ -n "${CFLAGS}" ]; then
|
|
cflags_preset="${CFLAGS}"
|
|
echo "${script_name}: detected preset CFLAGS; prepending:"
|
|
echo "${script_name}: ${cflags_preset}"
|
|
else
|
|
cflags_preset=''
|
|
echo "${script_name}: no preset CFLAGS detected."
|
|
fi
|
|
|
|
# Check if LDFLAGS is non-empty.
|
|
if [ -n "${LDFLAGS}" ]; then
|
|
ldflags_preset="${LDFLAGS}"
|
|
echo "${script_name}: detected preset LDFLAGS; prepending:"
|
|
echo "${script_name}: ${ldflags_preset}"
|
|
else
|
|
ldflags_preset=''
|
|
echo "${script_name}: no preset LDFLAGS detected."
|
|
fi
|
|
|
|
# Check if the debug flag was specified.
|
|
if [ -n "${debug_flag}" ]; then
|
|
if [ "x${debug_type}" = "xopt" ]; then
|
|
echo "${script_name}: enabling debug symbols with optimizations."
|
|
elif [ "x${debug_type}" = "xsde" ]; then
|
|
debug_type='sde'
|
|
echo "${script_name}: enabling SDE processor emulation."
|
|
else
|
|
debug_type='noopt'
|
|
echo "${script_name}: enabling debug symbols; optimizations disabled."
|
|
fi
|
|
else
|
|
debug_type='off'
|
|
echo "${script_name}: debug symbols disabled."
|
|
fi
|
|
|
|
# Check if the verbose make flag was specified.
|
|
if [ "x${enable_verbose}" = "xyes" ]; then
|
|
echo "${script_name}: enabling verbose make output. (disable with 'make V=0'.)"
|
|
else
|
|
echo "${script_name}: disabling verbose make output. (enable with 'make V=1'.)"
|
|
fi
|
|
|
|
# Check if the ARG_MAX hack was requested.
|
|
if [ "x${enable_arg_max_hack}" = "xyes" ]; then
|
|
echo "${script_name}: enabling ARG_MAX hack."
|
|
else
|
|
echo "${script_name}: disabling ARG_MAX hack."
|
|
fi
|
|
|
|
enable_shared_01=1
|
|
# Check if the static lib flag was specified.
|
|
if [ "x${enable_static}" = "xyes" -a "x${enable_shared}" = "xyes" ]; then
|
|
echo "${script_name}: building BLIS as both static and shared libraries."
|
|
elif [ "x${enable_static}" = "xyes" -a "x${enable_shared}" = "xno" ]; then
|
|
echo "${script_name}: building BLIS as a static library (shared library disabled)."
|
|
enable_shared_01=0
|
|
elif [ "x${enable_static}" = "xno" -a "x${enable_shared}" = "xyes" ]; then
|
|
echo "${script_name}: building BLIS as a shared library (static library disabled)."
|
|
else
|
|
echo "${script_name}: Both static and shared libraries were disabled."
|
|
echo "${script_name}: *** Please enable one (or both) to continue."
|
|
exit 1
|
|
fi
|
|
|
|
# Check if the "export shared" flag was specified.
|
|
if [ "x${export_shared}" = "xall" ]; then
|
|
if [ "x${enable_shared}" = "xyes" ]; then
|
|
echo "${script_name}: exporting all symbols within shared library."
|
|
else
|
|
echo "${script_name}: ignoring request to export all symbols within shared library."
|
|
fi
|
|
elif [ "x${export_shared}" = "xpublic" ]; then
|
|
if [ "x${enable_shared}" = "xyes" ]; then
|
|
echo "${script_name}: exporting only public symbols within shared library."
|
|
fi
|
|
else
|
|
echo "${script_name}: *** Invalid argument '${export_shared}' to --export-shared option given."
|
|
echo "${script_name}: *** Please use 'public' or 'all'."
|
|
exit 1
|
|
fi
|
|
|
|
# Check if we are building with or without operating system support.
|
|
if [ "x${enable_system}" = "xyes" ]; then
|
|
echo "${script_name}: enabling operating system support."
|
|
enable_system_01=1
|
|
else
|
|
echo "${script_name}: disabling operating system support."
|
|
echo "${script_name}: WARNING: all threading will be disabled!"
|
|
enable_system_01=0
|
|
|
|
# Force threading to be disabled.
|
|
threading_model='off'
|
|
fi
|
|
|
|
# Check the threading model flag and standardize its value, if needed.
|
|
# NOTE: 'omp' is deprecated but still supported; 'openmp' is preferred.
|
|
enable_openmp='no'
|
|
enable_openmp_01=0
|
|
enable_pthreads='no'
|
|
enable_pthreads_01=0
|
|
if [ "x${threading_model}" = "xauto" ]; then
|
|
echo "${script_name}: determining the threading model automatically."
|
|
elif [ "x${threading_model}" = "xopenmp" ] ||
|
|
[ "x${threading_model}" = "xomp" ]; then
|
|
echo "${script_name}: using OpenMP for threading."
|
|
enable_openmp='yes'
|
|
enable_openmp_01=1
|
|
threading_model="openmp" # Standardize the value.
|
|
elif [ "x${threading_model}" = "xpthreads" ] ||
|
|
[ "x${threading_model}" = "xpthread" ] ||
|
|
[ "x${threading_model}" = "xposix" ]; then
|
|
echo "${script_name}: using POSIX threads for threading."
|
|
enable_pthreads='yes'
|
|
enable_pthreads_01=1
|
|
threading_model="pthreads" # Standardize the value.
|
|
elif [ "x${threading_model}" = "xoff" ] ||
|
|
[ "x${threading_model}" = "xno" ] ||
|
|
[ "x${threading_model}" = "xnone" ]; then
|
|
echo "${script_name}: threading is disabled."
|
|
threading_model="off"
|
|
else
|
|
echo "${script_name}: *** Unsupported threading model: ${threading_model}."
|
|
exit 1
|
|
fi
|
|
|
|
# Check the method of assigning micropanels to threads in the JR and IR
|
|
# loops.
|
|
enable_jrir_slab_01=0
|
|
enable_jrir_rr_01=0
|
|
if [ "x${thread_part_jrir}" = "xslab" ]; then
|
|
echo "${script_name}: requesting slab threading in jr and ir loops."
|
|
enable_jrir_slab_01=1
|
|
elif [ "x${thread_part_jrir}" = "xrr" ]; then
|
|
echo "${script_name}: requesting round-robin threading in jr and ir loops."
|
|
enable_jrir_rr_01=1
|
|
else
|
|
echo "${script_name}: *** Unsupported method of thread partitioning in jr and ir loops: ${threading_model}."
|
|
exit 1
|
|
fi
|
|
|
|
# Convert 'yes' and 'no' flags to booleans.
|
|
if [ "x${enable_pba_pools}" = "xyes" ]; then
|
|
echo "${script_name}: internal memory pools for packing blocks are enabled."
|
|
enable_pba_pools_01=1
|
|
else
|
|
echo "${script_name}: internal memory pools for packing blocks are disabled."
|
|
enable_pba_pools_01=0
|
|
fi
|
|
if [ "x${enable_sba_pools}" = "xyes" ]; then
|
|
echo "${script_name}: internal memory pools for small blocks are enabled."
|
|
enable_sba_pools_01=1
|
|
else
|
|
echo "${script_name}: internal memory pools for small blocks are disabled."
|
|
enable_sba_pools_01=0
|
|
fi
|
|
if [ "x${enable_mem_tracing}" = "xyes" ]; then
|
|
echo "${script_name}: memory tracing output is enabled."
|
|
enable_mem_tracing_01=1
|
|
else
|
|
echo "${script_name}: memory tracing output is disabled."
|
|
enable_mem_tracing_01=0
|
|
fi
|
|
if [ "x${has_memkind}" = "xyes" ]; then
|
|
if [ "x${enable_memkind}" = "x" ]; then
|
|
# If no explicit option was given for libmemkind one way or the other,
|
|
# we use the value returned previously by has_libmemkind(), in this
|
|
# case "yes", to determine the default.
|
|
echo "${script_name}: libmemkind found; default is to enable use."
|
|
enable_memkind="yes"
|
|
enable_memkind_01=1
|
|
else
|
|
if [ "x${enable_memkind}" = "xyes" ]; then
|
|
echo "${script_name}: received explicit request to enable libmemkind."
|
|
enable_memkind="yes"
|
|
enable_memkind_01=1
|
|
else
|
|
echo "${script_name}: received explicit request to disable libmemkind."
|
|
enable_memkind="no"
|
|
enable_memkind_01=0
|
|
fi
|
|
fi
|
|
else
|
|
echo "${script_name}: libmemkind not found; disabling."
|
|
if [ "x${enable_memkind}" = "xyes" ]; then
|
|
echo "${script_name}: cannot honor explicit request to enable libmemkind."
|
|
fi
|
|
enable_memkind="no"
|
|
enable_memkind_01=0
|
|
fi
|
|
if [ "x${pragma_omp_simd}" = "xyes" ]; then
|
|
echo "${script_name}: compiler appears to support #pragma omp simd."
|
|
enable_pragma_omp_simd_01=1
|
|
else
|
|
echo "${script_name}: compiler appears to not support #pragma omp simd."
|
|
enable_pragma_omp_simd_01=0
|
|
fi
|
|
if [ "x${enable_cblas}" = "xyes" ]; then
|
|
echo "${script_name}: the CBLAS compatibility layer is enabled."
|
|
enable_cblas_01=1
|
|
# Force BLAS layer when CBLAS is enabled
|
|
enable_blas='yes'
|
|
else
|
|
echo "${script_name}: the CBLAS compatibility layer is disabled."
|
|
enable_cblas_01=0
|
|
fi
|
|
if [ "x${enable_blas}" = "xyes" ]; then
|
|
echo "${script_name}: the BLAS compatibility layer is enabled."
|
|
enable_blas_01=1
|
|
else
|
|
echo "${script_name}: the BLAS compatibility layer is disabled."
|
|
enable_blas_01=0
|
|
fi
|
|
if [ "x${enable_mixed_dt}" = "xyes" ]; then
|
|
echo "${script_name}: mixed datatype support is enabled."
|
|
|
|
if [ "x${enable_mixed_dt_extra_mem}" = "xyes" ]; then
|
|
echo "${script_name}: mixed datatype optimizations requiring extra memory are enabled."
|
|
enable_mixed_dt_extra_mem_01=1
|
|
else
|
|
echo "${script_name}: mixed datatype optimizations requiring extra memory are disabled."
|
|
enable_mixed_dt_extra_mem_01=0
|
|
fi
|
|
|
|
enable_mixed_dt_01=1
|
|
else
|
|
echo "${script_name}: mixed datatype support is disabled."
|
|
|
|
enable_mixed_dt_extra_mem_01=0
|
|
enable_mixed_dt_01=0
|
|
fi
|
|
if [ "x${enable_mnk1_matrix}" = "xyes" ]; then
|
|
echo "${script_name}: M,N,K=1 matrix handling is enabled."
|
|
enable_mnk1_matrix_01=1
|
|
else
|
|
echo "${script_name}: M,N,K=1 matrix handling is disabled."
|
|
enable_mnk1_matrix_01=0
|
|
fi
|
|
if [ "x${enable_tiny_matrix}" = "xyes" ]; then
|
|
echo "${script_name}: tiny matrix handling is enabled."
|
|
enable_tiny_matrix_01=1
|
|
else
|
|
echo "${script_name}: tiny matrix handling is disabled."
|
|
enable_tiny_matrix_01=0
|
|
fi
|
|
if [ "x${enable_small_matrix}" = "xyes" ]; then
|
|
echo "${script_name}: small matrix handling is enabled."
|
|
enable_small_matrix_01=1
|
|
else
|
|
echo "${script_name}: small matrix handling is disabled."
|
|
enable_small_matrix_01=0
|
|
fi
|
|
if [ "x${enable_sup_handling}" = "xyes" ]; then
|
|
echo "${script_name}: SUP matrix handling is enabled."
|
|
enable_sup_handling_01=1
|
|
else
|
|
echo "${script_name}: SUP matrix handling is disabled."
|
|
enable_sup_handling_01=0
|
|
fi
|
|
if [ "x${enable_small_matrix_trsm}" = "xyes" ]; then
|
|
echo "${script_name}: TRSM small matrix handling is enabled."
|
|
enable_small_matrix_trsm_01=1
|
|
else
|
|
echo "${script_name}: TRSM small matrix handling is disabled."
|
|
enable_small_matrix_trsm_01=0
|
|
fi
|
|
if [ "x${enable_trsm_preinversion}" = "xyes" ]; then
|
|
echo "${script_name}: trsm diagonal element pre-inversion is enabled."
|
|
enable_trsm_preinversion_01=1
|
|
else
|
|
echo "${script_name}: trsm diagonal element pre-inversion is disabled."
|
|
enable_trsm_preinversion_01=0
|
|
fi
|
|
|
|
if [ "x${enable_security_flags}" = "xyes" ]; then
|
|
echo "${script_name}: security hardening flags are enabled."
|
|
else
|
|
echo "${script_name}: security hardening flags are disabled."
|
|
fi
|
|
|
|
# Check AOCL DTL flag configuration
|
|
enable_aocl_dtl_trace_01=0
|
|
enable_aocl_dtl_log_01=0
|
|
if [ "x${enable_aocl_dtl}" = "xtrace" ]; then
|
|
enable_aocl_dtl_trace_01=1
|
|
elif [ "x${enable_aocl_dtl}" = "xlog" ]; then
|
|
enable_aocl_dtl_log_01=1
|
|
elif [ "x${enable_aocl_dtl}" = "xall" ]; then
|
|
enable_aocl_dtl_trace_01=1
|
|
enable_aocl_dtl_log_01=1
|
|
fi
|
|
|
|
if [ "x${aocl_dtl_trace_level_number}" = "xunset" ]; then
|
|
aocl_dtl_trace_level_number=5
|
|
fi
|
|
|
|
# Check aocl dynamic threading configuration and enable it only if
|
|
# multi-threading is enabled
|
|
if [ "x${enable_aocl_dynamic}" = "xyes" ]; then
|
|
if [ "x${threading_model}" != "xoff" ]; then
|
|
echo "${script_name}: dynamic selection of number of threads is enabled"
|
|
enable_aocl_dynamic_01=1
|
|
else
|
|
enable_aocl_dynamic_01=0
|
|
enable_aocl_dynamic="no"
|
|
echo "${script_name}: dynamic threading is disabled as multithreading is disabled"
|
|
fi
|
|
else
|
|
echo "${script_name}: dynamic selection of number of threads is disabled"
|
|
enable_aocl_dynamic_01=0
|
|
fi
|
|
|
|
# Report integer sizes.
|
|
if [ "x${int_type_size}" = "x32" ]; then
|
|
echo "${script_name}: the BLIS API integer size is 32-bit."
|
|
elif [ "x${int_type_size}" = "x64" ]; then
|
|
echo "${script_name}: the BLIS API integer size is 64-bit."
|
|
else
|
|
echo "${script_name}: the BLIS API integer size is automatically determined."
|
|
fi
|
|
if [ "x${blas_int_type_size}" = "x32" ]; then
|
|
echo "${script_name}: the BLAS/CBLAS API integer size is 32-bit."
|
|
elif [ "x${blas_int_type_size}" = "x64" ]; then
|
|
echo "${script_name}: the BLAS/CBLAS API integer size is 64-bit."
|
|
else
|
|
echo "${script_name}: the BLAS/CBLAS API integer size is automatically determined."
|
|
fi
|
|
|
|
# Disallow the simultaneous use of 64-bit integers in the BLAS and
|
|
# 32-bit integers in BLIS.
|
|
if [ "x${blas_int_type_size}" = "x64" -a "x${int_type_size}" = "x32" ]; then
|
|
echo "${script_name}: *** To avoid the possibility of truncation, we do not allow use of 64-bit integers in the BLAS API with 32-bit integers in BLIS. Please use a different configuration of integers."
|
|
exit 1
|
|
fi
|
|
|
|
# Check if addons were given.
|
|
if [ -n "${addon_flag}" ]; then
|
|
|
|
# Remove duplicates in the addon list, if they exist.
|
|
addon_list=$(rm_duplicate_words_simple "${addon_list}")
|
|
|
|
# Check compiler version requirements for each addon
|
|
echo "${script_name}: configuring with addons:"
|
|
|
|
new_addon_list=""
|
|
for addon in ${addon_list}; do
|
|
# Check if this is aocl_gemm addon and verify compiler version
|
|
if [ "${addon}" = "aocl_gemm" ]; then
|
|
if [ "${cc_vendor}" = "gcc" ]; then
|
|
# aocl_gemm addon (LPGEMM) requires GCC 11.2 or newer
|
|
# due to AVX-512 intrinsics and optimization requirements.
|
|
if [ ${cc_major} -lt 11 ] || [ ${cc_major} -eq 11 -a ${cc_minor} -lt 2 ]; then
|
|
echo "${script_name}: warning: aocl_gemm addon requires GCC 11.2 or newer."
|
|
echo "${script_name}: warning: Current GCC version is ${cc_version}."
|
|
echo "${script_name}: warning: Skipping aocl_gemm addon."
|
|
continue
|
|
fi
|
|
elif [ "${cc_vendor}" = "clang" ]; then
|
|
# aocl_gemm addon (LPGEMM) requires Clang 12.0 or newer
|
|
# due to AVX-512 intrinsics and C++17 requirements.
|
|
if [ ${cc_major} -lt 12 ]; then
|
|
echo "${script_name}: warning: aocl_gemm addon requires Clang 12.0 or newer."
|
|
echo "${script_name}: warning: Current Clang version is ${cc_version}."
|
|
echo "${script_name}: warning: Skipping aocl_gemm addon."
|
|
continue
|
|
fi
|
|
fi
|
|
fi
|
|
new_addon_list="${new_addon_list} ${addon}"
|
|
done
|
|
addon_list="${new_addon_list}"
|
|
|
|
for addon in ${addon_list}; do
|
|
|
|
echo "${script_name}: ${addon_dir}/${addon}"
|
|
|
|
addon_fullpath="${addon_dirpath}/${addon}"
|
|
|
|
if [ ! -d "${addon_fullpath}" ]; then
|
|
echo "${script_name}: requested addon sub-directory does not exist! Cannot continue."
|
|
echo "${script_name}: *** Please verify addon existence and name."
|
|
exit 1
|
|
fi
|
|
done
|
|
|
|
enable_addons_01=1
|
|
else
|
|
echo "${script_name}: configuring with no addons."
|
|
|
|
enable_addons_01=0
|
|
fi
|
|
|
|
# Check if a sandbox was given.
|
|
if [ -n "${sandbox_flag}" ]; then
|
|
|
|
#sandbox_relpath="${sandbox_dir}/${sandbox}"
|
|
|
|
echo "${script_name}: configuring for alternate gemm implementation:"
|
|
echo "${script_name}: ${sandbox_dir}/${sandbox}"
|
|
|
|
sandbox_fullpath="${sandbox_dirpath}/${sandbox}"
|
|
|
|
if [ ! -d "${sandbox_fullpath}" ]; then
|
|
echo "${script_name}: requested sandbox sub-directory does not exist! Cannot continue."
|
|
echo "${script_name}: *** Please verify sandbox existence and name."
|
|
exit 1
|
|
fi
|
|
|
|
enable_sandbox_01=1
|
|
else
|
|
echo "${script_name}: configuring for conventional gemm implementation."
|
|
|
|
enable_sandbox_01=0
|
|
fi
|
|
|
|
# Check the method used for returning complex numbers
|
|
if [ "x${complex_return}" = "xdefault" ]; then
|
|
if [ -n "${FC}" ]; then
|
|
# Determine the complex return type from the given Fortran compiler
|
|
|
|
# Query the full vendor version string output. This includes the
|
|
# version number along with (potentially) a bunch of other textual
|
|
# clutter.
|
|
# NOTE: This maybe should use merged stdout/stderr rather than only
|
|
# stdout. But it works for now.
|
|
vendor_string="$(${FC} --version 2>/dev/null)"
|
|
|
|
# Query the compiler "vendor" (ie: the compiler's simple name) and
|
|
# isolate the version number.
|
|
# The last part ({ read first rest ; echo $first ; }) is a workaround
|
|
# to OS X's egrep only returning the first match.
|
|
fc_vendor=$(echo "${vendor_string}" | egrep -o 'ifort|GNU' | { read first rest ; echo $first ; })
|
|
|
|
if [ "x${fc_vendor}" = "xifort" ]; then
|
|
complex_return='intel'
|
|
elif [ "x${fc_vendor}" = "xGNU" ]; then
|
|
complex_return='gnu'
|
|
else
|
|
echo "${script_name}: unable to determine Fortran compiler vendor!"
|
|
complex_return='gnu'
|
|
fi
|
|
else
|
|
complex_return='gnu'
|
|
fi
|
|
fi
|
|
|
|
if [ "x${complex_return}" = "xgnu" ]; then
|
|
complex_return_intel01='0'
|
|
elif [ "x${complex_return}" = "xintel" ]; then
|
|
complex_return_intel01='1'
|
|
else
|
|
echo "${script_name}: unknown complex return type \"${complex_return}\"! Cannot continue."
|
|
echo "${script_name}: *** Acceptable values are \"gnu\" and \"intel\"."
|
|
exit 1
|
|
fi
|
|
|
|
if [ "x${disable_blis_arch_type}" = "xyes" ]; then
|
|
echo "${script_name}: user selection of code path using AOCL_ENABLE_INSTRUCTIONS,"
|
|
echo "${script_name}: BLIS_ARCH_TYPE and BLIS_MODEL_TYPE env vars is disabled."
|
|
disable_blis_arch_type_01='1'
|
|
else
|
|
disable_blis_arch_type_01='0'
|
|
fi
|
|
|
|
# Check if the user requested a custom env var name to replace BLIS_ARCH_TYPE.
|
|
if [ "x${rename_blis_arch_type}" != "xBLIS_ARCH_TYPE" ]; then
|
|
echo "${script_name}: configuring with BLIS_ARCH_TYPE env var renamed to '${rename_blis_arch_type}'."
|
|
fi
|
|
# Check if the user requested a custom env var name to replace BLIS_MODEL_TYPE.
|
|
if [ "x${rename_blis_model_type}" != "xBLIS_MODEL_TYPE" ]; then
|
|
echo "${script_name}: configuring with BLIS_MODEL_TYPE env var renamed to '${rename_blis_model_type}'."
|
|
fi
|
|
|
|
echo "${script_name}: configuring complex return type as \"${complex_return}\"."
|
|
|
|
# Variables that may contain forward slashes, such as paths, need extra
|
|
# escaping when used in sed commands. We insert those extra escape
|
|
# characters here so that the sed commands below do the right thing.
|
|
os_name_esc=$(echo "${os_name}" | sed 's/\//\\\//g')
|
|
prefix_esc=$(echo "${prefix}" | sed 's/\//\\\//g')
|
|
exec_prefix_esc=$(echo "${exec_prefix}" | sed 's/\//\\\//g')
|
|
libdir_esc=$(echo "${libdir}" | sed 's/\//\\\//g')
|
|
includedir_esc=$(echo "${includedir}" | sed 's/\//\\\//g')
|
|
sharedir_esc=$(echo "${sharedir}" | sed 's/\//\\\//g')
|
|
dist_path_esc=$(echo "${dist_path}" | sed 's/\//\\\//g')
|
|
cc_esc=$(echo "${found_cc}" | sed 's/\//\\\//g')
|
|
cxx_esc=$(echo "${found_cxx}" | sed 's/\//\\\//g')
|
|
python_esc=$(echo "${found_python}" | sed 's/\//\\\//g')
|
|
#sandbox_relpath_esc=$(echo "${sandbox_relpath}" | sed 's/\//\\\//g')
|
|
|
|
# For RANLIB, if the variable is not set, we use a default value of
|
|
# 'ranlib'.
|
|
ranlib_esc=$(echo "${RANLIB:-ranlib}" | sed 's/\//\\\//g')
|
|
# For AR, if the variable is not set, we use a default value of 'ar'.
|
|
ar_esc=$(echo "${AR:-ar}" | sed 's/\//\\\//g')
|
|
libpthread_esc=$(echo "${LIBPTHREAD--lpthread}" | sed 's/\//\\\//g')
|
|
cflags_preset_esc=$(echo "${cflags_preset}" | sed 's/\//\\\//g')
|
|
ldflags_preset_esc=$(echo "${ldflags_preset}" | sed 's/\//\\\//g')
|
|
|
|
# For Windows builds, clear the libpthread_esc variable so that
|
|
# no pthreads library is substituted into config.mk. (Windows builds
|
|
# employ an implementation of pthreads that is internal to BLIS.)
|
|
if [[ "$is_win" == "yes" && "$cc_vendor" == "clang" ]]; then
|
|
libpthread_esc=
|
|
fi
|
|
|
|
# We also clear the libpthread_esc variable for systemless builds
|
|
# (--disable-system).
|
|
if [[ "$enable_system" == "no" ]]; then
|
|
libpthread_esc=
|
|
fi
|
|
|
|
# Typically, there are no slashes in the version variable. However,
|
|
# downstream maintainers (such as those for Debian) may create custom
|
|
# tags in their local clones such as "upstream/0.4.1", which obviously
|
|
# contain slashes. This line, and subsequent use of the escaped variable
|
|
# for the version string, accommodates those use cases.
|
|
version_esc=$(echo "${version}" | sed 's/\//\\\//g')
|
|
|
|
# Create a #define for the configuration family (config_name).
|
|
uconf=$(echo ${config_name} | tr '[:lower:]' '[:upper:]')
|
|
config_name_define="#define BLIS_FAMILY_${uconf}\n"
|
|
|
|
# Create a AOCL specific #define
|
|
# This macro is enabled only for zen family configurations.
|
|
# This enables us to use different cache block sizes for TRSM instead of common level-3 block sizes.
|
|
# Note: amd64_legacy is for pre-zen architectures.
|
|
uconf=$(echo ${config_name} | grep -v amd64_legacy |grep -c 'zen\|amd64\|x86_64' | cut -d. -f1)
|
|
if [[ $uconf == 1 ]]; then
|
|
enable_aocl_zen='yes'
|
|
enable_aocl_zen_01=1
|
|
else
|
|
enable_aocl_zen='no'
|
|
enable_aocl_zen_01=0;
|
|
fi
|
|
|
|
# Create a list of #defines, one for each configuration in config_list.
|
|
config_list_defines=""
|
|
for conf in ${config_list}; do
|
|
|
|
# Convert the current config name to uppercase.
|
|
uconf=$(echo ${conf} | tr '[:lower:]' '[:upper:]')
|
|
|
|
# Create a #define and add it to the running list.
|
|
config_define="BLIS_CONFIG_${uconf}"
|
|
config_list_defines="${config_list_defines}#define ${config_define}\n"
|
|
done
|
|
|
|
# Create a list of #defines, one for each kernel set in kernel_list.
|
|
kernel_list_defines=""
|
|
for kern in ${kernel_list}; do
|
|
|
|
# Convert the current config name to uppercase.
|
|
uconf=$(echo ${kern} | tr '[:lower:]' '[:upper:]')
|
|
|
|
# Create a #define and add it to the running list.
|
|
kernel_define="BLIS_KERNELS_${uconf}"
|
|
kernel_list_defines="${kernel_list_defines}#define ${kernel_define}\n"
|
|
done
|
|
|
|
# Create a list of #includes, one for each addon in addon_list.
|
|
addon_list_includes=""
|
|
for addon in ${addon_list}; do
|
|
|
|
# Create a #define and add it to the running list.
|
|
addon_header="\"${addon}.h\""
|
|
addon_list_includes="${addon_list_includes}#include ${addon_header}\n"
|
|
done
|
|
|
|
|
|
# -- Determine whether we are performing an out-of-tree build --------------
|
|
|
|
if [ "${dist_path}" != "./" ]; then
|
|
|
|
# At this point, we know the user did not run "./configure". But we
|
|
# have not yet ruled out "<fullpath>/configure" or some # equivalent
|
|
# that uses relative paths. To further rule out these possibilities,
|
|
# we create a dummy file in the current build directory.
|
|
touch "./${dummy_file}"
|
|
|
|
# If the dummy file we just created in the current directory does not
|
|
# appear in the source distribution path, then we are in a different
|
|
# directory and thus we must create a symbolic link.
|
|
if [ ! -f "${dist_path}/${dummy_file}" ]; then
|
|
configured_oot="yes"
|
|
#echo "${script_name}: detected out-of-tree build directory."
|
|
else
|
|
configured_oot="no"
|
|
#echo "${script_name}: detected in-tree build directory."
|
|
fi
|
|
|
|
# Remove the dummy file.
|
|
rm -f "./${dummy_file}"
|
|
fi
|
|
|
|
|
|
# -- Instantiate config.mk file from template ------------------------------
|
|
|
|
# Begin substituting information into the config_mk_in file, outputting
|
|
# to config_mk_out.
|
|
echo "${script_name}: creating ${config_mk_out_path} from ${config_mk_in_path}"
|
|
cat "${config_mk_in_path}" \
|
|
| sed -e "s/@version@/${version_esc}/g" \
|
|
| sed -e "s/@so_version_major@/${so_version_major}/g" \
|
|
| sed -e "s/@so_version_minorbuild@/${so_version_minorbuild}/g" \
|
|
| sed -e "s/@config_name@/${config_name}/g" \
|
|
| sed -e "s/@config_list@/${config_list}/g" \
|
|
| sed -e "s/@kernel_list@/${kernel_list}/g" \
|
|
| sed -e "s/@kconfig_map@/${kconfig_map}/g" \
|
|
| sed -e "s/@os_name@/${os_name_esc}/g" \
|
|
| sed -e "s/@is_win@/${is_win}/g" \
|
|
| sed -e "s/@dist_path@/${dist_path_esc}/g" \
|
|
| sed -e "s/@CC_VENDOR@/${cc_vendor}/g" \
|
|
| sed -e "s/@gcc_older_than_4_9_0@/${gcc_older_than_4_9_0}/g" \
|
|
| sed -e "s/@gcc_older_than_6_1_0@/${gcc_older_than_6_1_0}/g" \
|
|
| sed -e "s/@gcc_older_than_9_1_0@/${gcc_older_than_9_1_0}/g" \
|
|
| sed -e "s/@gcc_older_than_11_2_0@/${gcc_older_than_11_2_0}/g" \
|
|
| sed -e "s/@CC@/${cc_esc}/g" \
|
|
| sed -e "s/@CXX@/${cxx_esc}/g" \
|
|
| sed -e "s/@cc_major@/${cc_major}/g" \
|
|
| sed -e "s/@cc_minor@/${cc_minor}/g" \
|
|
| sed -e "s/@cc_revision@/${cc_revision}/g" \
|
|
| sed -e "s/@RANLIB@/${ranlib_esc}/g" \
|
|
| sed -e "s/@AR@/${ar_esc}/g" \
|
|
| sed -e "s/@PYTHON@/${python_esc}/g" \
|
|
| sed -e "s/@libpthread@/${libpthread_esc}/g" \
|
|
| sed -e "s/@cflags_preset@/${cflags_preset_esc}/g" \
|
|
| sed -e "s/@ldflags_preset@/${ldflags_preset_esc}/g" \
|
|
| sed -e "s/@debug_type@/${debug_type}/g" \
|
|
| sed -e "s/@enable_system@/${enable_system}/g" \
|
|
| sed -e "s/@threading_model@/${threading_model}/g" \
|
|
| sed -e "s/@prefix@/${prefix_esc}/g" \
|
|
| sed -e "s/@exec_prefix@/${exec_prefix_esc}/g" \
|
|
| sed -e "s/@libdir@/${libdir_esc}/g" \
|
|
| sed -e "s/@includedir@/${includedir_esc}/g" \
|
|
| sed -e "s/@sharedir@/${sharedir_esc}/g" \
|
|
| sed -e "s/@enable_verbose@/${enable_verbose}/g" \
|
|
| sed -e "s/@configured_oot@/${configured_oot}/g" \
|
|
| sed -e "s/@enable_arg_max_hack@/${enable_arg_max_hack}/g" \
|
|
| sed -e "s/@enable_static@/${enable_static}/g" \
|
|
| sed -e "s/@enable_shared@/${enable_shared}/g" \
|
|
| sed -e "s/@enable_rpath@/${enable_rpath}/g" \
|
|
| sed -e "s/@export_shared@/${export_shared}/g" \
|
|
| sed -e "s/@enable_blas@/${enable_blas}/g" \
|
|
| sed -e "s/@enable_cblas@/${enable_cblas}/g" \
|
|
| sed -e "s/@enable_memkind@/${enable_memkind}/g" \
|
|
| sed -e "s/@pragma_omp_simd@/${pragma_omp_simd}/g" \
|
|
| sed -e "s/@addon_list@/${addon_list}/g" \
|
|
| sed -e "s/@sandbox@/${sandbox}/g" \
|
|
| sed -e "s/@enable_trsm_preinversion@/${enable_trsm_preinversion}/g" \
|
|
| sed -e "s/@enable_aocl_dynamic@/${enable_aocl_dynamic}/g" \
|
|
| sed -e "s/@enable_security_flags@/${enable_security_flags}/g" \
|
|
| sed -e "s/@complex_return@/${complex_return}/g" \
|
|
| sed -e "s/@blas_int_type_size@/${blas_int_type_size}/g" \
|
|
| sed -e "s/\@enable_aocl_zen\@/${enable_aocl_zen}/g" \
|
|
> "${config_mk_out_path}"
|
|
|
|
# -- Instantiate bli_config.h file from template ---------------------------
|
|
|
|
# Begin substituting information into the bli_config_h_in file, outputting
|
|
# to bli_config_h_out. NOTE: We use perl instead of sed because the version
|
|
# of sed used on OS X is old and does not handle the '\n' character
|
|
# intuitively, which was used when constructing ${config_name_define},
|
|
# ${config_list_defines}, and ${kernel_list_defines}.
|
|
echo "${script_name}: creating ${bli_config_h_out_path} from ${bli_config_h_in_path}"
|
|
cat "${bli_config_h_in_path}" \
|
|
| perl -pe "s/\@config_name_define\@/${config_name_define}/g" \
|
|
| perl -pe "s/\@config_list_defines\@/${config_list_defines}/g" \
|
|
| perl -pe "s/\@kernel_list_defines\@/${kernel_list_defines}/g" \
|
|
| sed -e "s/\@enable_aocl_zen\@/${enable_aocl_zen_01}/g" \
|
|
| sed -e "s/@enable_system@/${enable_system_01}/g" \
|
|
| sed -e "s/@enable_openmp@/${enable_openmp_01}/g" \
|
|
| sed -e "s/@enable_pthreads@/${enable_pthreads_01}/g" \
|
|
| sed -e "s/@enable_jrir_slab@/${enable_jrir_slab_01}/g" \
|
|
| sed -e "s/@enable_jrir_rr@/${enable_jrir_rr_01}/g" \
|
|
| sed -e "s/@enable_pba_pools@/${enable_pba_pools_01}/g" \
|
|
| sed -e "s/@enable_sba_pools@/${enable_sba_pools_01}/g" \
|
|
| sed -e "s/@enable_mem_tracing@/${enable_mem_tracing_01}/g" \
|
|
| sed -e "s/@int_type_size@/${int_type_size}/g" \
|
|
| sed -e "s/@blas_int_type_size@/${blas_int_type_size}/g" \
|
|
| sed -e "s/@enable_blas@/${enable_blas_01}/g" \
|
|
| sed -e "s/@enable_cblas@/${enable_cblas_01}/g" \
|
|
| sed -e "s/@enable_mixed_dt@/${enable_mixed_dt_01}/g" \
|
|
| sed -e "s/@enable_mixed_dt_extra_mem@/${enable_mixed_dt_extra_mem_01}/g" \
|
|
| sed -e "s/@enable_mnk1_matrix@/${enable_mnk1_matrix_01}/g" \
|
|
| sed -e "s/@enable_tiny_matrix@/${enable_tiny_matrix_01}/g" \
|
|
| sed -e "s/@enable_small_matrix@/${enable_small_matrix_01}/g" \
|
|
| sed -e "s/@enable_sup_handling@/${enable_sup_handling_01}/g" \
|
|
| sed -e "s/@enable_small_matrix_trsm@/${enable_small_matrix_trsm_01}/g" \
|
|
| sed -e "s/@enable_trsm_preinversion@/${enable_trsm_preinversion_01}/g" \
|
|
| sed -e "s/@enable_memkind@/${enable_memkind_01}/g" \
|
|
| sed -e "s/@enable_aocl_dynamic@/${enable_aocl_dynamic_01}/g" \
|
|
| sed -e "s/@enable_pragma_omp_simd@/${enable_pragma_omp_simd_01}/g" \
|
|
| sed -e "s/@enable_sandbox@/${enable_sandbox_01}/g" \
|
|
| sed -e "s/@enable_shared@/${enable_shared_01}/g" \
|
|
| sed -e "s/@complex_return_intel@/${complex_return_intel01}/g" \
|
|
| sed -e "s/@disable_blis_arch_type@/${disable_blis_arch_type_01}/g" \
|
|
| sed -e "s/@rename_blis_arch_type@/${rename_blis_arch_type}/g" \
|
|
| sed -e "s/@rename_blis_model_type@/${rename_blis_model_type}/g" \
|
|
| sed -e "s/@enable_aocl_dtl_trace@/${enable_aocl_dtl_trace_01}/g" \
|
|
| sed -e "s/@enable_aocl_dtl_log@/${enable_aocl_dtl_log_01}/g" \
|
|
| sed -e "s/@aocl_dtl_trace_level_number@/${aocl_dtl_trace_level_number}/g" \
|
|
> "${bli_config_h_out_path}"
|
|
|
|
# -- Instantiate bli_addon.h file from template ----------------------------
|
|
|
|
# Begin substituting information into the bli_addon_h_in file, outputting
|
|
# to bli_addon_h_out. NOTE: We use perl instead of sed because the version
|
|
# of sed used on OS X is old and does not handle the '\n' character
|
|
# intuitively, which was used when constructing ${addon_list_includes}.
|
|
echo "${script_name}: creating ${bli_addon_h_out_path} from ${bli_addon_h_in_path}"
|
|
cat "${bli_addon_h_in_path}" \
|
|
| perl -pe "s/\@addon_list_includes\@/${addon_list_includes}/g" \
|
|
| sed -e "s/@enable_addons@/${enable_addons_01}/g" \
|
|
> "${bli_addon_h_out_path}"
|
|
|
|
# -- Create top-level object directories -----------------------------------
|
|
|
|
# Create obj sub-directories (if they do not already exist).
|
|
base_obj_dirpath="${obj_dirpath}/${config_name}"
|
|
|
|
echo "${script_name}: creating ${base_obj_dirpath}"
|
|
mkdir -p ${base_obj_dirpath}
|
|
|
|
|
|
obj_config_dirpath="${base_obj_dirpath}/${config_dir}"
|
|
|
|
mkdir -p ${obj_config_dirpath}
|
|
for conf in ${config_list}; do
|
|
echo "${script_name}: creating ${obj_config_dirpath}/${conf}"
|
|
mkdir -p ${obj_config_dirpath}/${conf}
|
|
done
|
|
|
|
|
|
obj_kernels_dirpath="${base_obj_dirpath}/${kernels_dir}"
|
|
|
|
mkdir -p ${obj_kernels_dirpath}
|
|
for kern in ${kernel_list}; do
|
|
echo "${script_name}: creating ${obj_kernels_dirpath}/${kern}"
|
|
mkdir -p ${obj_kernels_dirpath}/${kern}
|
|
done
|
|
|
|
|
|
obj_refkern_dirpath="${base_obj_dirpath}/${refkern_dir}"
|
|
|
|
mkdir -p ${obj_refkern_dirpath}
|
|
for conf in ${config_list}; do
|
|
echo "${script_name}: creating ${obj_refkern_dirpath}/${conf}"
|
|
mkdir -p ${obj_refkern_dirpath}/${conf}
|
|
done
|
|
|
|
|
|
obj_aocldtl_dirpath="${base_obj_dirpath}/${aocldtl_dir}"
|
|
|
|
echo "${script_name}: creating ${obj_aocldtl_dirpath}"
|
|
mkdir -p ${obj_aocldtl_dirpath}
|
|
|
|
|
|
obj_frame_dirpath="${base_obj_dirpath}/${frame_dir}"
|
|
|
|
echo "${script_name}: creating ${obj_frame_dirpath}"
|
|
mkdir -p ${obj_frame_dirpath}
|
|
|
|
|
|
if [ -n "${addon_flag}" ]; then
|
|
|
|
obj_addon_dirpath="${base_obj_dirpath}/${addon_dir}"
|
|
|
|
for addon in ${addon_list}; do
|
|
echo "${script_name}: creating ${obj_addon_dirpath}/${addon}"
|
|
mkdir -p ${obj_addon_dirpath}/${addon}
|
|
done
|
|
fi
|
|
|
|
|
|
if [ -n "${sandbox_flag}" ]; then
|
|
|
|
obj_sandbox_dirpath="${base_obj_dirpath}/${sandbox_dir}"
|
|
|
|
echo "${script_name}: creating ${obj_sandbox_dirpath}/${sandbox}"
|
|
mkdir -p ${obj_sandbox_dirpath}/${sandbox}
|
|
fi
|
|
|
|
|
|
obj_blastest_dirpath="${base_obj_dirpath}/${blastest_dir}"
|
|
|
|
echo "${script_name}: creating ${obj_blastest_dirpath}"
|
|
mkdir -p ${obj_blastest_dirpath}
|
|
|
|
|
|
obj_testsuite_dirpath="${base_obj_dirpath}/${testsuite_dir}"
|
|
|
|
echo "${script_name}: creating ${obj_testsuite_dirpath}"
|
|
mkdir -p ${obj_testsuite_dirpath}
|
|
|
|
|
|
# Create lib directory (if it does not already exist).
|
|
base_lib_dirpath="${lib_dirpath}/${config_name}"
|
|
|
|
echo "${script_name}: creating ${base_lib_dirpath}"
|
|
mkdir -p ${base_lib_dirpath}
|
|
|
|
|
|
# Create include directory (if it does not already exist).
|
|
base_include_dirpath="${include_dirpath}/${config_name}"
|
|
|
|
echo "${script_name}: creating ${base_include_dirpath}"
|
|
mkdir -p ${base_include_dirpath}
|
|
|
|
|
|
# -- Mirror source directory hierarchies to object directories -------------
|
|
|
|
# Combine the config_list with the config_name and then remove duplicates.
|
|
config_list_plus_name=$(rm_duplicate_words "${config_list} ${config_name}")
|
|
|
|
# Mirror each of the sub-configuration directories to the object directory.
|
|
for conf in ${config_list_plus_name}; do
|
|
|
|
echo "${script_name}: mirroring ${config_dirpath}/${conf} to ${obj_config_dirpath}/${conf}"
|
|
${mirror_tree_sh} "${config_dirpath}/${conf}" "${obj_config_dirpath}/${conf}"
|
|
done
|
|
|
|
# Mirror optimized kernels source tree to its object sub-directory.
|
|
# We perform the mirroring on each configuration/kernel sub-directory
|
|
# within 'kernels'.
|
|
for kern in ${kernel_list}; do
|
|
|
|
# Only mirror the optimized kernels source directory if it exists.
|
|
# There are occasions where one of the sub-configurations in the
|
|
# config_list does not correspond to a kernels sub-directory, such
|
|
# as when architecture B is so close to architecture A that B can
|
|
# use A's kernel source code unmodified (though perhaps with
|
|
# different blocksizes).
|
|
#if [ -d "${kernels_dirpath}/${conf}" ]; then
|
|
|
|
echo "${script_name}: mirroring ${kernels_dirpath}/${kern} to ${obj_kernels_dirpath}/${kern}"
|
|
${mirror_tree_sh} "${kernels_dirpath}/${kern}" "${obj_kernels_dirpath}/${kern}"
|
|
#else
|
|
# echo "${script_name}: mirroring ${kernels_dirpath}/${conf} skipped... directory does not exist"
|
|
#fi
|
|
done
|
|
|
|
# Mirror reference kernel source tree to its object sub-directory.
|
|
echo "${script_name}: mirroring ${refkern_dirpath} to ${obj_refkern_dirpath}"
|
|
${mirror_tree_sh} ${refkern_dirpath} ${obj_refkern_dirpath}
|
|
|
|
# Mirror reference kernels source tree to its object sub-directory.
|
|
for conf in ${config_list}; do
|
|
|
|
echo "${script_name}: mirroring ${refkern_dirpath} to ${obj_refkern_dirpath}/${conf}"
|
|
${mirror_tree_sh} "${refkern_dirpath}" "${obj_refkern_dirpath}/${conf}"
|
|
done
|
|
|
|
# Mirror framework source tree to its object sub-directory.
|
|
echo "${script_name}: mirroring ${frame_dirpath} to ${obj_frame_dirpath}"
|
|
${mirror_tree_sh} ${frame_dirpath} ${obj_frame_dirpath}
|
|
|
|
# Mirror framework source tree to its object sub-directory.
|
|
echo "${script_name}: mirroring ${aocldtl_dirpath} to ${obj_aocldtl_dirpath}"
|
|
${mirror_tree_sh} ${aocldtl_dirpath} ${obj_aocldtl_dirpath}
|
|
|
|
# Mirror the chosen addon source tree to its object sub-directory.
|
|
if [ -n "${addon_flag}" ]; then
|
|
|
|
for addon in ${addon_list}; do
|
|
|
|
echo "${script_name}: mirroring ${addon_dirpath}/${addon} to ${obj_addon_dirpath}/${addon}"
|
|
${mirror_tree_sh} "${addon_dirpath}/${addon}" "${obj_addon_dirpath}/${addon}"
|
|
done
|
|
fi
|
|
|
|
# Mirror the chosen sandbox source tree to its object sub-directory.
|
|
if [ -n "${sandbox_flag}" ]; then
|
|
|
|
echo "${script_name}: mirroring ${sandbox_dirpath}/${sandbox} to ${obj_sandbox_dirpath}/${sandbox}"
|
|
${mirror_tree_sh} "${sandbox_dirpath}/${sandbox}" "${obj_sandbox_dirpath}/${sandbox}"
|
|
fi
|
|
|
|
|
|
# -- Generate makefile fragements ------------------------------------------
|
|
|
|
clist_contains_cname=$(is_in_list "${config_name}" "${config_list}")
|
|
|
|
# If the config_list does not already contain the config_name (i.e.,
|
|
# if config_name is an umbrella family), generate makefiles in that
|
|
# directory. (In the next step, we will loop over the actual sub-
|
|
# configurations and create fragments there as well.)
|
|
if [ "${clist_contains_cname}" == "false" ]; then
|
|
|
|
echo "${script_name}: creating makefile fragments in ${obj_config_dirpath}/${config_name}"
|
|
${gen_make_frags_sh} \
|
|
-h -r -v0 \
|
|
-o ${script_name} \
|
|
-p 'CONFIG' \
|
|
${config_dirpath}/${config_name} \
|
|
${obj_config_dirpath}/${config_name} \
|
|
${gen_make_frags_dirpath}/fragment.mk \
|
|
${gen_make_frags_dirpath}/suffix_list \
|
|
${gen_make_frags_dirpath}/ignore_list
|
|
fi
|
|
|
|
# Generate makefile fragments for each of the sub-configurations present
|
|
# in the configuration list.
|
|
for conf in ${config_list}; do
|
|
|
|
echo "${script_name}: creating makefile fragments in ${obj_config_dirpath}/${conf}"
|
|
${gen_make_frags_sh} \
|
|
-h -r -v0 \
|
|
-o ${script_name} \
|
|
-p 'CONFIG' \
|
|
${config_dirpath}/${conf} \
|
|
${obj_config_dirpath}/${conf} \
|
|
${gen_make_frags_dirpath}/fragment.mk \
|
|
${gen_make_frags_dirpath}/suffix_list \
|
|
${gen_make_frags_dirpath}/ignore_list
|
|
done
|
|
|
|
# Generate makefile fragments for each of the kernel sets required by
|
|
# the configuration list (in the kernel list).
|
|
for kern in ${kernel_list}; do
|
|
|
|
echo "${script_name}: creating makefile fragments in ${obj_kernels_dirpath}/${kern}"
|
|
${gen_make_frags_sh} \
|
|
-h -r -v0 \
|
|
-o ${script_name} \
|
|
-p 'KERNELS' \
|
|
${kernels_dirpath}/${kern} \
|
|
${obj_kernels_dirpath}/${kern} \
|
|
${gen_make_frags_dirpath}/fragment.mk \
|
|
${gen_make_frags_dirpath}/suffix_list \
|
|
${gen_make_frags_dirpath}/ignore_list
|
|
done
|
|
|
|
# Generate makefile fragments in the reference kernels directory.
|
|
echo "${script_name}: creating makefile fragments in ${obj_refkern_dirpath}"
|
|
${gen_make_frags_sh} \
|
|
-h -r -v0 \
|
|
-o ${script_name} \
|
|
-p 'REFKERN' \
|
|
${refkern_dirpath} \
|
|
${obj_refkern_dirpath} \
|
|
${gen_make_frags_dirpath}/fragment.mk \
|
|
${gen_make_frags_dirpath}/suffix_list \
|
|
${gen_make_frags_dirpath}/ignore_list
|
|
|
|
# Generate makefile fragments in the DTL directory.
|
|
echo "${script_name}: creating makefile fragments in ${obj_aocldtl_dirpath}"
|
|
${gen_make_frags_sh} \
|
|
-h -r -v0 \
|
|
-o ${script_name} \
|
|
-p 'AOCLDTL' \
|
|
${aocldtl_dirpath} \
|
|
${obj_aocldtl_dirpath} \
|
|
${gen_make_frags_dirpath}/fragment.mk \
|
|
${gen_make_frags_dirpath}/suffix_list \
|
|
${gen_make_frags_dirpath}/ignore_list
|
|
|
|
# Generate makefile fragments in the framework directory.
|
|
echo "${script_name}: creating makefile fragments in ${obj_frame_dirpath}"
|
|
${gen_make_frags_sh} \
|
|
-h -r -v0 \
|
|
-o ${script_name} \
|
|
-p 'FRAME' \
|
|
${frame_dirpath} \
|
|
${obj_frame_dirpath} \
|
|
${gen_make_frags_dirpath}/fragment.mk \
|
|
${gen_make_frags_dirpath}/suffix_list \
|
|
${gen_make_frags_dirpath}/ignore_list
|
|
|
|
# Generate makefile fragments in the addon sub-directory.
|
|
if [ -n "${addon_flag}" ]; then
|
|
|
|
for addon in ${addon_list}; do
|
|
|
|
echo "${script_name}: creating makefile fragments in ${obj_addon_dirpath}/${addon}"
|
|
${gen_make_frags_sh} \
|
|
-h -r -v0 \
|
|
-o ${script_name} \
|
|
-p 'ADDON' \
|
|
${addon_dirpath}/${addon} \
|
|
${obj_addon_dirpath}/${addon} \
|
|
${gen_make_frags_dirpath}/fragment.mk \
|
|
${gen_make_frags_dirpath}/suffix_list \
|
|
${gen_make_frags_dirpath}/ignore_list
|
|
done
|
|
fi
|
|
|
|
|
|
# Generate makefile fragments in the sandbox sub-directory.
|
|
if [ -n "${sandbox_flag}" ]; then
|
|
|
|
echo "${script_name}: creating makefile fragments in ${obj_sandbox_dirpath}/${sandbox}"
|
|
${gen_make_frags_sh} \
|
|
-h -r -v0 \
|
|
-o ${script_name} \
|
|
-p 'SANDBOX' \
|
|
${sandbox_dirpath}/${sandbox} \
|
|
${obj_sandbox_dirpath}/${sandbox} \
|
|
${gen_make_frags_dirpath}/fragment.mk \
|
|
${gen_make_frags_dirpath}/suffix_list \
|
|
${gen_make_frags_dirpath}/ignore_list
|
|
fi
|
|
|
|
|
|
# -- Handle out-of-tree builds ---------------------------------------------
|
|
|
|
# Under some circumstances, we need to create some symbolic links to
|
|
# properly handle out-of-tree builds.
|
|
if [ "${configured_oot}" = "yes" ]; then
|
|
|
|
# If 'Makefile' symlink does not already exist in the current
|
|
# directory, create a symbolic link to it. If one does exist, we
|
|
# use -f to force creation of a new link.
|
|
if [ ! -e "./Makefile" ]; then
|
|
|
|
echo "${script_name}: creating symbolic link to Makefile."
|
|
ln -s "${dist_path}/Makefile"
|
|
|
|
elif [ -h "./Makefile" ]; then
|
|
echo "${script_name}: symbolic link to Makefile already exists; forcing creation of new link."
|
|
ln -sf "${dist_path}/Makefile"
|
|
else
|
|
echo "${script_name}: Non-symbolic link file or directory 'Makefile' blocks creation of symlink."
|
|
echo "${script_name}: *** Please remove this entity and re-run configure."
|
|
exit 1
|
|
fi
|
|
|
|
# If 'blis.pc.in' symlink does not already exist in the current
|
|
# directory, create a symbolic link to it. If one does exist, we
|
|
# use -f to force creation of a new link.
|
|
if [ ! -e "./blis.pc.in" ]; then
|
|
|
|
echo "${script_name}: creating symbolic link to blis.pc.in."
|
|
ln -s "${dist_path}/blis.pc.in"
|
|
|
|
elif [ -h "./blis.pc.in" ]; then
|
|
echo "${script_name}: symbolic link to blis.pc.in already exists; forcing creation of new link."
|
|
ln -sf "${dist_path}/blis.pc.in"
|
|
else
|
|
echo "${script_name}: Non-symbolic link file or directory 'blis.pc.in' blocks creation of symlink."
|
|
echo "${script_name}: *** Please remove this entity and re-run configure."
|
|
exit 1
|
|
fi
|
|
|
|
# If 'common.mk' symlink does not already exist in the current
|
|
# directory, create a symbolic link to it. If one does exist, we
|
|
# use -f to force creation of a new link.
|
|
if [ ! -e "./common.mk" ]; then
|
|
|
|
echo "${script_name}: creating symbolic link to common.mk."
|
|
ln -s "${dist_path}/common.mk"
|
|
|
|
elif [ -h "./common.mk" ]; then
|
|
echo "${script_name}: symbolic link to common.mk already exists; forcing creation of new link."
|
|
ln -sf "${dist_path}/common.mk"
|
|
else
|
|
echo "${script_name}: Non-symbolic link file or directory 'common.mk' blocks creation of symlink."
|
|
echo "${script_name}: *** Please remove this entity and re-run configure."
|
|
exit 1
|
|
fi
|
|
|
|
# If 'config' symlink does not already exist in the current
|
|
# directory, create a symbolic link to it. If one does exist, we
|
|
# use -f to force creation of a new link.
|
|
if [ ! -e "./config" ]; then
|
|
|
|
echo "${script_name}: creating symbolic link to 'config' directory."
|
|
ln -s "${dist_path}/config"
|
|
|
|
elif [ -h "./config" ]; then
|
|
echo "${script_name}: symbolic link to 'config' directory already exists; forcing creation of new link."
|
|
ln -sf "${dist_path}/config"
|
|
else
|
|
echo "${script_name}: Non-symbolic link file or directory 'config' blocks creation of symlink."
|
|
echo "${script_name}: *** Please remove this entity and re-run configure."
|
|
exit 1
|
|
fi
|
|
|
|
echo "${script_name}: configured to build outside of source distribution."
|
|
else
|
|
|
|
echo "${script_name}: configured to build within top-level directory of source distribution."
|
|
fi
|
|
|
|
if [ "${warn_user_generic}" = "1" ]; then
|
|
|
|
echo "${script_name}: "
|
|
echo "${script_name}: *** Unable to automatically detect hardware type! ***"
|
|
echo "${script_name}: "
|
|
echo "${script_name}: NOTE: configure was unable to identify a subconfiguration"
|
|
echo "${script_name}: optimized for your hardware. As a result, the 'generic'"
|
|
echo "${script_name}: subconfiguration (with low-performance reference kernels)"
|
|
echo "${script_name}: will be used. For support, please open an issue on GitHub"
|
|
echo "${script_name}: at https://github.com/flame/blis/issues."
|
|
echo "${script_name}: "
|
|
fi
|
|
|
|
# Exit peacefully.
|
|
return 0
|
|
}
|
|
|
|
|
|
# The script's main entry point, passing all parameters given.
|
|
main "$@"
|
|
|