mirror of
https://github.com/amd/blis.git
synced 2026-06-08 14:58:42 +00:00
Merge branch 'master' into dev
This commit is contained in:
56
configure
vendored
56
configure
vendored
@@ -1311,8 +1311,7 @@ get_compiler_version()
|
||||
# to OS X's egrep only returning the first match.
|
||||
cc_vendor=$(echo "${vendor_string}" | egrep -o 'icc|gcc|clang|emcc|pnacl|IBM' | { read first rest ; echo $first ; })
|
||||
if [ "${cc_vendor}" = "icc" -o \
|
||||
"${cc_vendor}" = "gcc" -o \
|
||||
"${cc_vendor}" = "clang" ]; then
|
||||
"${cc_vendor}" = "gcc" ]; then
|
||||
cc_version=$(${cc} -dumpversion)
|
||||
else
|
||||
cc_version=$(echo "${vendor_string}" | egrep -o '[0-9]+\.[0-9]+\.?[0-9]*' | { read first rest ; echo ${first} ; })
|
||||
@@ -1360,7 +1359,7 @@ check_compiler()
|
||||
# Specific:
|
||||
#
|
||||
# skx: icc 15.0.1+, gcc 6.0+, clang 3.9+
|
||||
# knl: icc 14.0.1+, gcc 5.0+, clang 3.5+
|
||||
# knl: icc 14.0.1+, gcc 5.0+, clang 3.9+
|
||||
# haswell: any
|
||||
# sandybridge: any
|
||||
# penryn: any
|
||||
@@ -1435,27 +1434,42 @@ check_compiler()
|
||||
|
||||
# clang
|
||||
if [ "x${cc_vendor}" = "xclang" ]; then
|
||||
|
||||
if [ ${cc_major} -lt 3 ]; then
|
||||
echoerr_unsupportedcc
|
||||
fi
|
||||
if [ ${cc_major} -eq 3 ]; then
|
||||
if [ ${cc_minor} -lt 3 ]; then
|
||||
if [ "$(echo ${vendor_string} | grep -o Apple)" = "Apple" ]; then
|
||||
if [ ${cc_major} -lt 5 ]; then
|
||||
echoerr_unsupportedcc
|
||||
fi
|
||||
if [ ${cc_minor} -lt 5 ]; then
|
||||
# See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions
|
||||
if [ ${cc_major} -eq 5 ]; then
|
||||
# Apple clang 5.0 is clang 3.4svn
|
||||
blacklistcc_add "excavator"
|
||||
blacklistcc_add "zen"
|
||||
blacklistcc_add "knl"
|
||||
fi
|
||||
if [ ${cc_minor} -lt 9 ]; then
|
||||
if [ ${cc_major} -lt 7 ]; then
|
||||
blacklistcc_add "knl"
|
||||
blacklistcc_add "skx"
|
||||
fi
|
||||
fi
|
||||
if [ ${cc_major} -lt 4 ]; then
|
||||
# See comment above regarding zen support.
|
||||
#blacklistcc_add "zen"
|
||||
: # explicit no-op since bash can't handle empty loop bodies.
|
||||
else
|
||||
if [ ${cc_major} -lt 3 ]; then
|
||||
echoerr_unsupportedcc
|
||||
fi
|
||||
if [ ${cc_major} -eq 3 ]; then
|
||||
if [ ${cc_minor} -lt 3 ]; then
|
||||
echoerr_unsupportedcc
|
||||
fi
|
||||
if [ ${cc_minor} -lt 5 ]; then
|
||||
blacklistcc_add "excavator"
|
||||
blacklistcc_add "zen"
|
||||
fi
|
||||
if [ ${cc_minor} -lt 9 ]; then
|
||||
blacklistcc_add "knl"
|
||||
blacklistcc_add "skx"
|
||||
fi
|
||||
fi
|
||||
if [ ${cc_major} -lt 4 ]; then
|
||||
# See comment above regarding zen support.
|
||||
#blacklistcc_add "zen"
|
||||
: # explicit no-op since bash can't handle empty loop bodies.
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
}
|
||||
@@ -1513,8 +1527,8 @@ check_assembler()
|
||||
#
|
||||
|
||||
# The assembler on OS X won't recognize AVX-512 without help.
|
||||
if [ "$(uname -s)" == "Darwin" ]; then
|
||||
cflags="-Wa,-march=knl"
|
||||
if [ "${cc_vendor}" == "clang" ]; then
|
||||
cflags="-march=knl"
|
||||
fi
|
||||
|
||||
asm_fp=$(find ${asm_dir} -name "avx512f.s")
|
||||
@@ -1530,8 +1544,8 @@ check_assembler()
|
||||
#
|
||||
|
||||
# The assembler on OS X won't recognize AVX-512 without help.
|
||||
if [ "$(uname -s)" == "Darwin" ]; then
|
||||
cflags="-Wa,-march=skylake-avx512"
|
||||
if [ "${cc_vendor}" == "clang" ]; then
|
||||
cflags="-march=skylake-avx512"
|
||||
fi
|
||||
|
||||
asm_fp=$(find ${asm_dir} -name "avx512dq.s")
|
||||
|
||||
@@ -9,6 +9,9 @@
|
||||
* **[Step 3b: Testing (optional)](BuildSystem.md#step-3b-testing-optional)**
|
||||
* **[Step 4: Installation](BuildSystem.md#step-4-installation)**
|
||||
* **[Cleaning out build products](BuildSystem.md#cleaning-out-build-products)**
|
||||
* **[Compiling with BLIS](BuildSystem.md#compiling-with-blis)**
|
||||
* [Disabling BLAS prototypes](BuildSystem.md#disabling-blas-prototypes)
|
||||
* [CBLAS](BuildSystem.md#cblas)
|
||||
* **[Linking against BLIS](BuildSystem.md#linking-against-blis)**
|
||||
* **[Uninstalling](BuildSystem.md#uninstalling)**
|
||||
* **[make targets](BuildSystem.md#make-targets)**
|
||||
@@ -83,7 +86,7 @@ Alternatively, `configure` can automatically select a configuration based on you
|
||||
```
|
||||
$ ./configure auto
|
||||
```
|
||||
However, as of this writing, only a limited number of architectures are detected. If the `configure` script is not able to detect your architecture, the `generic` configuration will be used.
|
||||
However, as of this writing, only a limited number of architectures are detected. If the `configure` script is not able to detect your architecture, the `generic` configuration will be used.
|
||||
|
||||
Upon running configure, you will get output similar to the following. The exact output will depend on whether you cloned BLIS from a `git` repository or whether you obtained BLIS via a downloadable tarball from the [releases](https://github.com/flame/blis/releases) page.
|
||||
```
|
||||
@@ -338,6 +341,47 @@ Removing include.
|
||||
Running the `distclean` target is like saying, "Remove anything ever created by the build system."
|
||||
|
||||
|
||||
## Compiling with BLIS
|
||||
|
||||
All BLIS definitions and prototypes may be included in your C source file by including a single header file, `blis.h`:
|
||||
```c
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "otherstuff.h"
|
||||
#include "blis.h"
|
||||
```
|
||||
If the BLAS compatibility layer was enabled at configure-time (as it is by default), then `blis.h` will also provide BLAS prototypes to your source code.
|
||||
|
||||
|
||||
### Disabling BLAS prototypes
|
||||
|
||||
Some applications already `#include` a header that contains BLAS prototypes. This can cause problems if those applications also try to `#include` the BLIS header file, as shown above. Suppose for a moment that `otherstuff.h` in the example above already provides BLAS prototypes.
|
||||
```
|
||||
$ gcc -I/path/to/blis -I/path/to/otherstuff -c main.c -o main.o
|
||||
In file included from main.c:41:0:
|
||||
/path/to/blis/blis.h:36900:111: error: conflicting declaration of C function ‘int xerbla_(const bla_character*, const bla_integer*, ftnlen)’
|
||||
TEF770(xerbla)(const bla_character *srname, const bla_integer *info, ftnlen srname_len);
|
||||
```
|
||||
If your application is already declaring (prototyping) BLAS functions, then you may disable those prototypes from being defined included within `blis.h`. This prevents `blis.h` from re-declaring those prototypes, or, allows your other header to declare those functions for the first time, depending on the order that you `#include` the headers.
|
||||
```c
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "otherstuff.h"
|
||||
#define BLIS_DISABLE_BLAS_DEFS // disable BLAS prototypes within BLIS.
|
||||
#include "blis.h"
|
||||
```
|
||||
By `#defining` the `BLIS_DISABLE_BLAS_DEFS` macro, we signal to `blis.h` that it should skip over the BLAS prototypes, but otherwise `#include` everything else as it normally would. Note that `BLIS_DISABLE_BLAS_DEFS` must be `#defined` *prior* to the `#include "blis.h"` directive in order for it to have any effect.
|
||||
|
||||
|
||||
### CBLAS
|
||||
|
||||
If you build BLIS with CBLAS enabled and you wish to access CBLAS function prototypes from within your application, you will have to `#include` the `cblas.h` header separately from `blis.h`.
|
||||
```
|
||||
#include "blis.h"
|
||||
#include "cblas.h"
|
||||
```
|
||||
|
||||
|
||||
## Linking against BLIS
|
||||
|
||||
Once you have instantiated (configured and compiled, and perhaps installed) a BLIS library, you can link to it in your application's makefile as you would any other library. The following is an abbreviated makefile for a small hypothetical application that has just two external dependencies: BLIS and the standard C math library. We also link against libpthread since that library has been a runtime dependency of BLIS since 70640a3 (December 2017).
|
||||
@@ -357,7 +401,7 @@ OBJS = main.o util.o other.o
|
||||
%.o: %.c
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
all: $(OBJS)
|
||||
all: $(OBJS)
|
||||
$(LINKER) $(OBJS) $(BLIS_LIB) $(OTHER_LIBS) -o my_program.x
|
||||
```
|
||||
The above example assumes you will want to include BLIS definitions and function prototypes into your application via `#include blis.h`. (If you are only using the BLIS via the BLAS compatibility layer, including `blis.h` is not necessary.) Since BLIS headers are installed into a `blis` subdirectory of `PREFIX/include`, you must make sure that the compiler knows where to find the `blis.h` header file. This is typically accomplished by inserting `#include "blis.h"` into your application's source code files and compiling the code with `-I PREFIX/include/blis`.
|
||||
|
||||
@@ -35,12 +35,13 @@ complex) and each column representing a different operation (`gemm`,
|
||||
Each of the 20 graphs within a panel will contain an x-axis that reports
|
||||
problem size, with all matrix dimensions equal to the problem size (e.g.
|
||||
_m_ = _n_ = _k_), resulting in square matrices.
|
||||
The y-axis will report GFLOPS (in the case of single-threaded performance)
|
||||
or GFLOPS/core (in the case of single- or dual-socket multithreaded
|
||||
performance), which is simply the total GFLOPS divided by the number of
|
||||
threads utilized.
|
||||
This normalization is done intentionally in order to facilitate visual
|
||||
comparison of multithreaded graphs and single-threaded graphs.
|
||||
The y-axis will report in units GFLOPS (billions of floating-point operations
|
||||
per second) in the case of single-threaded performance, or GFLOPS/core in the
|
||||
case of single- or dual-socket multithreaded performance, where GFLOPS/core
|
||||
is simply the total GFLOPS observed divided by the number of threads utilized.
|
||||
This normalization is done intentionally in order to facilitate a visual
|
||||
assessment of the drop in efficiency of multithreaded performance relative
|
||||
to their single-threaded baselines.
|
||||
|
||||
It's also worth pointing out that the top of each graph (e.g. the maximum
|
||||
y-axis value depicted) _always_ corresponds to the theoretical peak performance
|
||||
|
||||
@@ -40,10 +40,31 @@
|
||||
#endif
|
||||
#endif // BLIS_ENABLE_CBLAS
|
||||
|
||||
// By default, if the BLAS compatibility layer is enabled, we define
|
||||
// (include) all of the BLAS prototypes. However, if the user is
|
||||
// #including "blis.h" and also #including another header that also
|
||||
// declares the BLAS functions, then we provide an opportunity to
|
||||
// #undefine the BLIS_ENABLE_BLAS_DEFS macro (see below).
|
||||
#ifdef BLIS_ENABLE_BLAS
|
||||
#define BLIS_ENABLE_BLAS_DEFS
|
||||
#else
|
||||
#undef BLIS_ENABLE_BLAS_DEFS
|
||||
#endif
|
||||
|
||||
// Skip prototyping all of the BLAS if the BLAS test drivers are being
|
||||
// compiled.
|
||||
#ifndef BLIS_VIA_BLASTEST
|
||||
#ifdef BLIS_ENABLE_BLAS
|
||||
#ifdef BLIS_VIA_BLASTEST
|
||||
#undef BLIS_ENABLE_BLAS_DEFS
|
||||
#endif
|
||||
|
||||
// Skip prototyping all of the BLAS if the environment has defined the
|
||||
// macro BLIS_DISABLE_BLAS_DEFS.
|
||||
#ifdef BLIS_DISABLE_BLAS_DEFS
|
||||
#undef BLIS_ENABLE_BLAS_DEFS
|
||||
#endif
|
||||
|
||||
// Begin including all BLAS prototypes.
|
||||
#ifdef BLIS_ENABLE_BLAS_DEFS
|
||||
|
||||
|
||||
// -- System headers needed by BLAS compatibility layer --
|
||||
@@ -180,4 +201,3 @@
|
||||
|
||||
|
||||
#endif // BLIS_ENABLE_BLAS
|
||||
#endif // BLIS_VIA_BLASTEST
|
||||
|
||||
Reference in New Issue
Block a user