Merge branch 'master' into dev

This commit is contained in:
Field G. Van Zee
2019-03-26 15:51:45 -05:00
4 changed files with 111 additions and 32 deletions

56
configure vendored
View File

@@ -1311,8 +1311,7 @@ get_compiler_version()
# to OS X's egrep only returning the first match.
cc_vendor=$(echo "${vendor_string}" | egrep -o 'icc|gcc|clang|emcc|pnacl|IBM' | { read first rest ; echo $first ; })
if [ "${cc_vendor}" = "icc" -o \
"${cc_vendor}" = "gcc" -o \
"${cc_vendor}" = "clang" ]; then
"${cc_vendor}" = "gcc" ]; then
cc_version=$(${cc} -dumpversion)
else
cc_version=$(echo "${vendor_string}" | egrep -o '[0-9]+\.[0-9]+\.?[0-9]*' | { read first rest ; echo ${first} ; })
@@ -1360,7 +1359,7 @@ check_compiler()
# Specific:
#
# skx: icc 15.0.1+, gcc 6.0+, clang 3.9+
# knl: icc 14.0.1+, gcc 5.0+, clang 3.5+
# knl: icc 14.0.1+, gcc 5.0+, clang 3.9+
# haswell: any
# sandybridge: any
# penryn: any
@@ -1435,27 +1434,42 @@ check_compiler()
# clang
if [ "x${cc_vendor}" = "xclang" ]; then
if [ ${cc_major} -lt 3 ]; then
echoerr_unsupportedcc
fi
if [ ${cc_major} -eq 3 ]; then
if [ ${cc_minor} -lt 3 ]; then
if [ "$(echo ${vendor_string} | grep -o Apple)" = "Apple" ]; then
if [ ${cc_major} -lt 5 ]; then
echoerr_unsupportedcc
fi
if [ ${cc_minor} -lt 5 ]; then
# See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions
if [ ${cc_major} -eq 5 ]; then
# Apple clang 5.0 is clang 3.4svn
blacklistcc_add "excavator"
blacklistcc_add "zen"
blacklistcc_add "knl"
fi
if [ ${cc_minor} -lt 9 ]; then
if [ ${cc_major} -lt 7 ]; then
blacklistcc_add "knl"
blacklistcc_add "skx"
fi
fi
if [ ${cc_major} -lt 4 ]; then
# See comment above regarding zen support.
#blacklistcc_add "zen"
: # explicit no-op since bash can't handle empty loop bodies.
else
if [ ${cc_major} -lt 3 ]; then
echoerr_unsupportedcc
fi
if [ ${cc_major} -eq 3 ]; then
if [ ${cc_minor} -lt 3 ]; then
echoerr_unsupportedcc
fi
if [ ${cc_minor} -lt 5 ]; then
blacklistcc_add "excavator"
blacklistcc_add "zen"
fi
if [ ${cc_minor} -lt 9 ]; then
blacklistcc_add "knl"
blacklistcc_add "skx"
fi
fi
if [ ${cc_major} -lt 4 ]; then
# See comment above regarding zen support.
#blacklistcc_add "zen"
: # explicit no-op since bash can't handle empty loop bodies.
fi
fi
fi
}
@@ -1513,8 +1527,8 @@ check_assembler()
#
# The assembler on OS X won't recognize AVX-512 without help.
if [ "$(uname -s)" == "Darwin" ]; then
cflags="-Wa,-march=knl"
if [ "${cc_vendor}" == "clang" ]; then
cflags="-march=knl"
fi
asm_fp=$(find ${asm_dir} -name "avx512f.s")
@@ -1530,8 +1544,8 @@ check_assembler()
#
# The assembler on OS X won't recognize AVX-512 without help.
if [ "$(uname -s)" == "Darwin" ]; then
cflags="-Wa,-march=skylake-avx512"
if [ "${cc_vendor}" == "clang" ]; then
cflags="-march=skylake-avx512"
fi
asm_fp=$(find ${asm_dir} -name "avx512dq.s")

View File

@@ -9,6 +9,9 @@
* **[Step 3b: Testing (optional)](BuildSystem.md#step-3b-testing-optional)**
* **[Step 4: Installation](BuildSystem.md#step-4-installation)**
* **[Cleaning out build products](BuildSystem.md#cleaning-out-build-products)**
* **[Compiling with BLIS](BuildSystem.md#compiling-with-blis)**
* [Disabling BLAS prototypes](BuildSystem.md#disabling-blas-prototypes)
* [CBLAS](BuildSystem.md#cblas)
* **[Linking against BLIS](BuildSystem.md#linking-against-blis)**
* **[Uninstalling](BuildSystem.md#uninstalling)**
* **[make targets](BuildSystem.md#make-targets)**
@@ -83,7 +86,7 @@ Alternatively, `configure` can automatically select a configuration based on you
```
$ ./configure auto
```
However, as of this writing, only a limited number of architectures are detected. If the `configure` script is not able to detect your architecture, the `generic` configuration will be used.
However, as of this writing, only a limited number of architectures are detected. If the `configure` script is not able to detect your architecture, the `generic` configuration will be used.
Upon running configure, you will get output similar to the following. The exact output will depend on whether you cloned BLIS from a `git` repository or whether you obtained BLIS via a downloadable tarball from the [releases](https://github.com/flame/blis/releases) page.
```
@@ -338,6 +341,47 @@ Removing include.
Running the `distclean` target is like saying, "Remove anything ever created by the build system."
## Compiling with BLIS
All BLIS definitions and prototypes may be included in your C source file by including a single header file, `blis.h`:
```c
#include "stdio.h"
#include "stdlib.h"
#include "otherstuff.h"
#include "blis.h"
```
If the BLAS compatibility layer was enabled at configure-time (as it is by default), then `blis.h` will also provide BLAS prototypes to your source code.
### Disabling BLAS prototypes
Some applications already `#include` a header that contains BLAS prototypes. This can cause problems if those applications also try to `#include` the BLIS header file, as shown above. Suppose for a moment that `otherstuff.h` in the example above already provides BLAS prototypes.
```
$ gcc -I/path/to/blis -I/path/to/otherstuff -c main.c -o main.o
In file included from main.c:41:0:
/path/to/blis/blis.h:36900:111: error: conflicting declaration of C function int xerbla_(const bla_character*, const bla_integer*, ftnlen)
TEF770(xerbla)(const bla_character *srname, const bla_integer *info, ftnlen srname_len);
```
If your application is already declaring (prototyping) BLAS functions, then you may disable those prototypes from being defined included within `blis.h`. This prevents `blis.h` from re-declaring those prototypes, or, allows your other header to declare those functions for the first time, depending on the order that you `#include` the headers.
```c
#include "stdio.h"
#include "stdlib.h"
#include "otherstuff.h"
#define BLIS_DISABLE_BLAS_DEFS // disable BLAS prototypes within BLIS.
#include "blis.h"
```
By `#defining` the `BLIS_DISABLE_BLAS_DEFS` macro, we signal to `blis.h` that it should skip over the BLAS prototypes, but otherwise `#include` everything else as it normally would. Note that `BLIS_DISABLE_BLAS_DEFS` must be `#defined` *prior* to the `#include "blis.h"` directive in order for it to have any effect.
### CBLAS
If you build BLIS with CBLAS enabled and you wish to access CBLAS function prototypes from within your application, you will have to `#include` the `cblas.h` header separately from `blis.h`.
```
#include "blis.h"
#include "cblas.h"
```
## Linking against BLIS
Once you have instantiated (configured and compiled, and perhaps installed) a BLIS library, you can link to it in your application's makefile as you would any other library. The following is an abbreviated makefile for a small hypothetical application that has just two external dependencies: BLIS and the standard C math library. We also link against libpthread since that library has been a runtime dependency of BLIS since 70640a3 (December 2017).
@@ -357,7 +401,7 @@ OBJS = main.o util.o other.o
%.o: %.c
$(CC) $(CFLAGS) -c $< -o $@
all: $(OBJS)
all: $(OBJS)
$(LINKER) $(OBJS) $(BLIS_LIB) $(OTHER_LIBS) -o my_program.x
```
The above example assumes you will want to include BLIS definitions and function prototypes into your application via `#include blis.h`. (If you are only using the BLIS via the BLAS compatibility layer, including `blis.h` is not necessary.) Since BLIS headers are installed into a `blis` subdirectory of `PREFIX/include`, you must make sure that the compiler knows where to find the `blis.h` header file. This is typically accomplished by inserting `#include "blis.h"` into your application's source code files and compiling the code with `-I PREFIX/include/blis`.

View File

@@ -35,12 +35,13 @@ complex) and each column representing a different operation (`gemm`,
Each of the 20 graphs within a panel will contain an x-axis that reports
problem size, with all matrix dimensions equal to the problem size (e.g.
_m_ = _n_ = _k_), resulting in square matrices.
The y-axis will report GFLOPS (in the case of single-threaded performance)
or GFLOPS/core (in the case of single- or dual-socket multithreaded
performance), which is simply the total GFLOPS divided by the number of
threads utilized.
This normalization is done intentionally in order to facilitate visual
comparison of multithreaded graphs and single-threaded graphs.
The y-axis will report in units GFLOPS (billions of floating-point operations
per second) in the case of single-threaded performance, or GFLOPS/core in the
case of single- or dual-socket multithreaded performance, where GFLOPS/core
is simply the total GFLOPS observed divided by the number of threads utilized.
This normalization is done intentionally in order to facilitate a visual
assessment of the drop in efficiency of multithreaded performance relative
to their single-threaded baselines.
It's also worth pointing out that the top of each graph (e.g. the maximum
y-axis value depicted) _always_ corresponds to the theoretical peak performance

View File

@@ -40,10 +40,31 @@
#endif
#endif // BLIS_ENABLE_CBLAS
// By default, if the BLAS compatibility layer is enabled, we define
// (include) all of the BLAS prototypes. However, if the user is
// #including "blis.h" and also #including another header that also
// declares the BLAS functions, then we provide an opportunity to
// #undefine the BLIS_ENABLE_BLAS_DEFS macro (see below).
#ifdef BLIS_ENABLE_BLAS
#define BLIS_ENABLE_BLAS_DEFS
#else
#undef BLIS_ENABLE_BLAS_DEFS
#endif
// Skip prototyping all of the BLAS if the BLAS test drivers are being
// compiled.
#ifndef BLIS_VIA_BLASTEST
#ifdef BLIS_ENABLE_BLAS
#ifdef BLIS_VIA_BLASTEST
#undef BLIS_ENABLE_BLAS_DEFS
#endif
// Skip prototyping all of the BLAS if the environment has defined the
// macro BLIS_DISABLE_BLAS_DEFS.
#ifdef BLIS_DISABLE_BLAS_DEFS
#undef BLIS_ENABLE_BLAS_DEFS
#endif
// Begin including all BLAS prototypes.
#ifdef BLIS_ENABLE_BLAS_DEFS
// -- System headers needed by BLAS compatibility layer --
@@ -180,4 +201,3 @@
#endif // BLIS_ENABLE_BLAS
#endif // BLIS_VIA_BLASTEST