From 2947cfb749c937b0f62fac36cc92f123bd45b53c Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Wed, 1 Apr 2015 12:24:00 -0500 Subject: [PATCH 1/3] Add auto-detecting CPU on configure stage. e.g. /Path_to_BLIS/configure auto Now, it only support detecting x86 CPUs. --- build/auto-detect/auto-detect.sh | 75 ++++++++++ build/auto-detect/cpuid_x86.c | 235 +++++++++++++++++++++++++++++++ configure | 8 +- 3 files changed, 316 insertions(+), 2 deletions(-) create mode 100755 build/auto-detect/auto-detect.sh create mode 100644 build/auto-detect/cpuid_x86.c diff --git a/build/auto-detect/auto-detect.sh b/build/auto-detect/auto-detect.sh new file mode 100755 index 000000000..4bf514fea --- /dev/null +++ b/build/auto-detect/auto-detect.sh @@ -0,0 +1,75 @@ +#!/bin/bash +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2015, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name of The University of Texas at Austin nor the names +# of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# + +# +# auto-detect.sh +# +# Zhang Xianyi +# + + +main() +{ + CC=gcc + CPUID_SRC=cpuid_x86.c + CPUID_BIN=blis_cpu_detect + + # The name of the script, stripped of any preceeding path. + script_name=${0##*/} + + # The path to the script. We need this to find the top-level directory + # of the source distribution in the event that the user has chosen to + # build elsewhere. + dist_path=${0%/${script_name}} + + # The path to the directory in which we are building. We do this to + # make explicit that we distinguish between the top-level directory + # of the distribution and the directory in which we are building. + cur_dirpath="." + + + OSNAME=`uname` + if [ $OSNAME = "Darwin" ]; then + CC=clang + fi + $CC -o ${cur_dirpath}/$CPUID_BIN ${dist_path}/$CPUID_SRC + ${cur_dirpath}/$CPUID_BIN + rm -rf ${cur_dirpath}/$CPUID_BIN + # Exit peacefully. + return 0 +} + + +# The script's main entry point, passing all parameters given. +main "$@" diff --git a/build/auto-detect/cpuid_x86.c b/build/auto-detect/cpuid_x86.c new file mode 100644 index 000000000..e7757862b --- /dev/null +++ b/build/auto-detect/cpuid_x86.c @@ -0,0 +1,235 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2015, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include + +#define VENDOR_UNKNOWN 0 +#define VENDOR_INTEL 1 +#define VENDOR_AMD 2 + +#define CPUNAME_REFERENCE 0 +#define CPUNAME_DUNNINGTON 1 +#define CPUNAME_SANDYBRIDGE 2 +#define CPUNAME_BULLDOZER 3 +#define CPUNAME_PILEDRIVER 4 + +static char *cpuname[] = { + "reference", + "dunnington", + "sandybridge", + "bulldozer", + "piledriver", +}; + +#define BITMASK(a, b, c) ((((a) >> (b)) & (c))) + +static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ +#if defined(__i386__) && defined(__PIC__) + __asm__ __volatile__ + ("mov %%ebx, %%edi;" + "cpuid;" + "xchgl %%ebx, %%edi;" + : "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc"); +#else + __asm__ __volatile__ + ("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc"); +#endif +} + +static inline int have_cpuid(void){ + int eax, ebx, ecx, edx; + + cpuid(0, &eax, &ebx, &ecx, &edx); + return eax; +} + + +int get_vendor(void){ + int eax, ebx, ecx, edx; + char vendor[13]; + + cpuid(0, &eax, &ebx, &ecx, &edx); + + *(int *)(&vendor[0]) = ebx; + *(int *)(&vendor[4]) = edx; + *(int *)(&vendor[8]) = ecx; + vendor[12] = (char)0; + + if (!strcmp(vendor, "GenuineIntel")) return VENDOR_INTEL; + if (!strcmp(vendor, "AuthenticAMD")) return VENDOR_AMD; + + if ((eax == 0) || ((eax & 0x500) != 0)) return VENDOR_INTEL; + + return VENDOR_UNKNOWN; +} + + +static inline void xgetbv(int op, int * eax, int * edx){ + //Use binary code for xgetbv + __asm__ __volatile__ + (".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc"); +} + +int support_avx(){ + int eax, ebx, ecx, edx; + int ret=0; + + cpuid(1, &eax, &ebx, &ecx, &edx); + if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){ + xgetbv(0, &eax, &edx); + if((eax & 6) == 6){ + ret=1; //OS support AVX + } + } + return ret; +} + + +int cpu_detect() +{ + int eax, ebx, ecx, edx; + int vendor, family, extend_family, model, extend_model; + + if (!have_cpuid()) return CPUNAME_REFERENCE; + + vendor=get_vendor(); + + cpuid(1, &eax, &ebx, &ecx, &edx); + extend_family = BITMASK(eax, 20, 0xff); + extend_model=BITMASK(eax, 16, 0x0f); + family=BITMASK(eax, 8, 0x0f); + model=BITMASK(eax, 4, 0x0f); + + if (vendor == VENDOR_INTEL){ + switch (family) { + case 0x6: + switch (extend_model) { + case 1: + switch (model) { + case 7: + //penryn uses dunnington config. + return CPUNAME_DUNNINGTON; + case 13: + return CPUNAME_DUNNINGTON; + } + break; + case 2: + switch (model) { + case 10: + case 13: + if(support_avx()) { + return CPUNAME_SANDYBRIDGE; + }else{ + return CPUNAME_REFERENCE; //OS doesn't support AVX + } + } + break; + case 3: + switch (model) { + case 10: + case 14: + //Ivy Bridge + if(support_avx()) { + return CPUNAME_SANDYBRIDGE; + }else{ + return CPUNAME_REFERENCE; //OS doesn't support AVX + } + case 12: + case 15: + //Haswell. Temp use Sandy Brdige + if(support_avx()) { + return CPUNAME_SANDYBRIDGE; + }else{ + return CPUNAME_REFERENCE; //OS doesn't support AVX + } + + } + break; + case 4: + switch (model) { + case 5: + case 6: + //Haswell. Temp use Sandy Brdige + if(support_avx()) { + return CPUNAME_SANDYBRIDGE; + }else{ + return CPUNAME_REFERENCE; //OS doesn't support AVX + } + } + break; + } + break; + } + }else if (vendor == VENDOR_AMD){ + switch (family) { + case 0xf: + switch (extend_family) { + case 6: + switch (model) { + case 1: + if(support_avx()) + return CPUNAME_BULLDOZER; + else + return CPUNAME_REFERENCE; //OS don't support AVX. + case 2: + if(support_avx()) + return CPUNAME_PILEDRIVER; + else + return CPUNAME_REFERENCE; //OS don't support AVX. + case 0: + //Steamroller. Temp use Piledriver. + if(support_avx()) + return CPUNAME_PILEDRIVER; + else + return CPUNAME_REFERENCE; //OS don't support AVX. + } + } + break; + } + } + + return CPUNAME_REFERENCE; +} + + +int main() +{ + int cpuname_id; + + cpuname_id=cpu_detect(); + + printf("%s\n", cpuname[cpuname_id]); + return 0; +} diff --git a/configure b/configure index acdb2ef78..3c8dbbd27 100755 --- a/configure +++ b/configure @@ -185,7 +185,7 @@ main() # line option processing). if [ $# = "0" ]; then - configs_avail=$(ls ${config_dirpath}) + configs_avail="auto "$(ls ${config_dirpath}) echo "${script_name}: " echo "${script_name}: *** No configuration given! ***" @@ -216,7 +216,11 @@ main() elif [ $# = "1" ]; then - config_name=$1 + if [ $1 = "auto" ]; then + config_name=`${build_dirpath}/auto-detect/auto-detect.sh` + else + config_name=$1 + fi echo "${script_name}: configuring with '${config_name}' configuration sub-directory." else From aa6eec4f43137057276fe6119bdbfb5c52682527 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Thu, 2 Apr 2015 16:03:44 -0500 Subject: [PATCH 2/3] Detect the CPU architecture. Support ARM cores. Detect the CPU architecture by compiler's predefined macros. Then, detect the CPU cores. Support detecting x86 and ARM architectures. --- build/auto-detect/arch_detect.c | 15 +++++ build/auto-detect/auto-detect.sh | 30 +++++++++ build/auto-detect/cpuid_arm.c | 107 +++++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+) create mode 100644 build/auto-detect/arch_detect.c create mode 100644 build/auto-detect/cpuid_arm.c diff --git a/build/auto-detect/arch_detect.c b/build/auto-detect/arch_detect.c new file mode 100644 index 000000000..a43fbf5af --- /dev/null +++ b/build/auto-detect/arch_detect.c @@ -0,0 +1,15 @@ +#if defined(__i386) || defined(_X86) +ARCH_X86 +#endif + +#if defined(__x86_64__) || defined(__amd64__) +ARCH_X86_64 +#endif + +#if defined(__arm__) +ARCH_ARM +#endif + +#if defined(__aarch64__) +ARCH_AARCH64 +#endif diff --git a/build/auto-detect/auto-detect.sh b/build/auto-detect/auto-detect.sh index 4bf514fea..9300e3b8b 100755 --- a/build/auto-detect/auto-detect.sh +++ b/build/auto-detect/auto-detect.sh @@ -44,6 +44,7 @@ main() CC=gcc CPUID_SRC=cpuid_x86.c CPUID_BIN=blis_cpu_detect + ARCH=reference # The name of the script, stripped of any preceeding path. script_name=${0##*/} @@ -63,9 +64,38 @@ main() if [ $OSNAME = "Darwin" ]; then CC=clang fi + + # + # Detect architecture by predefined macros + # + + out1=`$CC -E ${dist_path}/arch_detect.c` + + ARCH=`echo $out1 | grep -o "ARCH_[a-zA-Z0-9_]*" | head -n1` + + if [ $ARCH = "ARCH_X86_64" ]; then + CPUID_SRC=cpuid_x86.c + elif [ $ARCH = "ARCH_X86" ]; then + CPUID_SRC=cpuid_x86.c + elif [ $ARCH = "ARCH_ARM" ]; then + CPUID_SRC=cpuid_arm.c + elif [ $ARCH = "ARCH_AARCH64" ]; then + #Only support armv8 now + echo "armv8a" + return 0 + else + echo "reference" + return 0 + fi + + # + # Detect CPU cores + # + $CC -o ${cur_dirpath}/$CPUID_BIN ${dist_path}/$CPUID_SRC ${cur_dirpath}/$CPUID_BIN rm -rf ${cur_dirpath}/$CPUID_BIN + # Exit peacefully. return 0 } diff --git a/build/auto-detect/cpuid_arm.c b/build/auto-detect/cpuid_arm.c new file mode 100644 index 000000000..b846931ea --- /dev/null +++ b/build/auto-detect/cpuid_arm.c @@ -0,0 +1,107 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2015, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include + +#define CPUNAME_REFERENCE 0 +#define CPUNAME_ARMV7 1 +#define CPUNAME_CORTEXA9 2 +#define CPUNAME_CORTEXA15 3 + +static char *cpuname[] = { + "reference", + "armv7a", + "cortex-a9", + "cortex-a15", +}; + +int cpu_detect(void) +{ + FILE *infile; + char buffer[512], *p; + p = (char *) NULL ; + + infile = fopen("/proc/cpuinfo", "r"); + if (infile == NULL) { + return CPUNAME_REFERENCE; + } + while (fgets(buffer, sizeof(buffer), infile)) { + if (!strncmp("CPU part", buffer, 8)) { + p = strchr(buffer, ':') + 2; + break; + } + } + fclose(infile); + + if(p != NULL) { + if (strstr(p, "0xc09")) { + return CPUNAME_CORTEXA9; + } + if (strstr(p, "0xc0f")) { + return CPUNAME_CORTEXA15; + } + } + + p = (char *) NULL ; + infile = fopen("/proc/cpuinfo", "r"); + if (infile == NULL) { + return CPUNAME_REFERENCE; + } + + while (fgets(buffer, sizeof(buffer), infile)) { + if ((!strncmp("model name", buffer, 10)) || (!strncmp("Processor", buffer, 9))) { + p = strchr(buffer, ':') + 2; + break; + } + } + fclose(infile); + + if(p != NULL) { + if (strstr(p, "ARMv7")) { + return CPUNAME_ARMV7; + } + } + + return CPUNAME_REFERENCE; +} + +int main() +{ + int cpuname_id; + + cpuname_id=cpu_detect(); + printf("%s\n", cpuname[cpuname_id]); + return 0; +} From 4bfd1ce8ca93f93d170dd2715f0a32027b417b46 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Thu, 2 Apr 2015 16:40:21 -0500 Subject: [PATCH 3/3] Detect NEON for cortex-a9 and cortex-a15. --- build/auto-detect/cpuid_arm.c | 43 +++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/build/auto-detect/cpuid_arm.c b/build/auto-detect/cpuid_arm.c index b846931ea..54959cac1 100644 --- a/build/auto-detect/cpuid_arm.c +++ b/build/auto-detect/cpuid_arm.c @@ -47,6 +47,39 @@ static char *cpuname[] = { "cortex-a15", }; + +int get_feature(char *search) +{ + FILE *infile; + char buffer[2048], *p,*t; + p = (char *) NULL; + + infile = fopen("/proc/cpuinfo", "r"); + if (infile == NULL) { + return 0; + } + + while (fgets(buffer, sizeof(buffer), infile)) { + if (!strncmp("Features", buffer, 8)) { + p = strchr(buffer, ':') + 2; + break; + } + } + fclose(infile); + + if( p == NULL ) return 0; + + t = strtok(p," "); + if (t != NULL) { + if (!strcmp(t, search)) { return 1; } + } + while( t = strtok(NULL," ")){ + if (!strcmp(t, search)) { return 1; } + } + + return 0; +} + int cpu_detect(void) { FILE *infile; @@ -67,10 +100,16 @@ int cpu_detect(void) if(p != NULL) { if (strstr(p, "0xc09")) { - return CPUNAME_CORTEXA9; + if(get_feature("neon")) + return CPUNAME_CORTEXA9; + else + return CPUNAME_ARMV7; } if (strstr(p, "0xc0f")) { - return CPUNAME_CORTEXA15; + if(get_feature("neon")) + return CPUNAME_CORTEXA15; + else + return CPUNAME_ARMV7; } }