Merge pull request #23 from xianyi/master

Add auto-detecting CPU  on configure stage.
This commit is contained in:
Field G. Van Zee
2015-04-03 08:28:11 -05:00
5 changed files with 507 additions and 2 deletions

View File

@@ -0,0 +1,15 @@
#if defined(__i386) || defined(_X86)
ARCH_X86
#endif
#if defined(__x86_64__) || defined(__amd64__)
ARCH_X86_64
#endif
#if defined(__arm__)
ARCH_ARM
#endif
#if defined(__aarch64__)
ARCH_AARCH64
#endif

105
build/auto-detect/auto-detect.sh Executable file
View File

@@ -0,0 +1,105 @@
#!/bin/bash
#
# BLIS
# An object-based framework for developing high-performance BLAS-like
# libraries.
#
# Copyright (C) 2015, The University of Texas at Austin
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# - Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# - Neither the name of The University of Texas at Austin nor the names
# of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
#
# auto-detect.sh
#
# Zhang Xianyi
#
main()
{
CC=gcc
CPUID_SRC=cpuid_x86.c
CPUID_BIN=blis_cpu_detect
ARCH=reference
# The name of the script, stripped of any preceeding path.
script_name=${0##*/}
# The path to the script. We need this to find the top-level directory
# of the source distribution in the event that the user has chosen to
# build elsewhere.
dist_path=${0%/${script_name}}
# The path to the directory in which we are building. We do this to
# make explicit that we distinguish between the top-level directory
# of the distribution and the directory in which we are building.
cur_dirpath="."
OSNAME=`uname`
if [ $OSNAME = "Darwin" ]; then
CC=clang
fi
#
# Detect architecture by predefined macros
#
out1=`$CC -E ${dist_path}/arch_detect.c`
ARCH=`echo $out1 | grep -o "ARCH_[a-zA-Z0-9_]*" | head -n1`
if [ $ARCH = "ARCH_X86_64" ]; then
CPUID_SRC=cpuid_x86.c
elif [ $ARCH = "ARCH_X86" ]; then
CPUID_SRC=cpuid_x86.c
elif [ $ARCH = "ARCH_ARM" ]; then
CPUID_SRC=cpuid_arm.c
elif [ $ARCH = "ARCH_AARCH64" ]; then
#Only support armv8 now
echo "armv8a"
return 0
else
echo "reference"
return 0
fi
#
# Detect CPU cores
#
$CC -o ${cur_dirpath}/$CPUID_BIN ${dist_path}/$CPUID_SRC
${cur_dirpath}/$CPUID_BIN
rm -rf ${cur_dirpath}/$CPUID_BIN
# Exit peacefully.
return 0
}
# The script's main entry point, passing all parameters given.
main "$@"

View File

@@ -0,0 +1,146 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2015, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <string.h>
#define CPUNAME_REFERENCE 0
#define CPUNAME_ARMV7 1
#define CPUNAME_CORTEXA9 2
#define CPUNAME_CORTEXA15 3
static char *cpuname[] = {
"reference",
"armv7a",
"cortex-a9",
"cortex-a15",
};
int get_feature(char *search)
{
FILE *infile;
char buffer[2048], *p,*t;
p = (char *) NULL;
infile = fopen("/proc/cpuinfo", "r");
if (infile == NULL) {
return 0;
}
while (fgets(buffer, sizeof(buffer), infile)) {
if (!strncmp("Features", buffer, 8)) {
p = strchr(buffer, ':') + 2;
break;
}
}
fclose(infile);
if( p == NULL ) return 0;
t = strtok(p," ");
if (t != NULL) {
if (!strcmp(t, search)) { return 1; }
}
while( t = strtok(NULL," ")){
if (!strcmp(t, search)) { return 1; }
}
return 0;
}
int cpu_detect(void)
{
FILE *infile;
char buffer[512], *p;
p = (char *) NULL ;
infile = fopen("/proc/cpuinfo", "r");
if (infile == NULL) {
return CPUNAME_REFERENCE;
}
while (fgets(buffer, sizeof(buffer), infile)) {
if (!strncmp("CPU part", buffer, 8)) {
p = strchr(buffer, ':') + 2;
break;
}
}
fclose(infile);
if(p != NULL) {
if (strstr(p, "0xc09")) {
if(get_feature("neon"))
return CPUNAME_CORTEXA9;
else
return CPUNAME_ARMV7;
}
if (strstr(p, "0xc0f")) {
if(get_feature("neon"))
return CPUNAME_CORTEXA15;
else
return CPUNAME_ARMV7;
}
}
p = (char *) NULL ;
infile = fopen("/proc/cpuinfo", "r");
if (infile == NULL) {
return CPUNAME_REFERENCE;
}
while (fgets(buffer, sizeof(buffer), infile)) {
if ((!strncmp("model name", buffer, 10)) || (!strncmp("Processor", buffer, 9))) {
p = strchr(buffer, ':') + 2;
break;
}
}
fclose(infile);
if(p != NULL) {
if (strstr(p, "ARMv7")) {
return CPUNAME_ARMV7;
}
}
return CPUNAME_REFERENCE;
}
int main()
{
int cpuname_id;
cpuname_id=cpu_detect();
printf("%s\n", cpuname[cpuname_id]);
return 0;
}

View File

@@ -0,0 +1,235 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2015, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <string.h>
#define VENDOR_UNKNOWN 0
#define VENDOR_INTEL 1
#define VENDOR_AMD 2
#define CPUNAME_REFERENCE 0
#define CPUNAME_DUNNINGTON 1
#define CPUNAME_SANDYBRIDGE 2
#define CPUNAME_BULLDOZER 3
#define CPUNAME_PILEDRIVER 4
static char *cpuname[] = {
"reference",
"dunnington",
"sandybridge",
"bulldozer",
"piledriver",
};
#define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
#if defined(__i386__) && defined(__PIC__)
__asm__ __volatile__
("mov %%ebx, %%edi;"
"cpuid;"
"xchgl %%ebx, %%edi;"
: "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
#else
__asm__ __volatile__
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
#endif
}
static inline int have_cpuid(void){
int eax, ebx, ecx, edx;
cpuid(0, &eax, &ebx, &ecx, &edx);
return eax;
}
int get_vendor(void){
int eax, ebx, ecx, edx;
char vendor[13];
cpuid(0, &eax, &ebx, &ecx, &edx);
*(int *)(&vendor[0]) = ebx;
*(int *)(&vendor[4]) = edx;
*(int *)(&vendor[8]) = ecx;
vendor[12] = (char)0;
if (!strcmp(vendor, "GenuineIntel")) return VENDOR_INTEL;
if (!strcmp(vendor, "AuthenticAMD")) return VENDOR_AMD;
if ((eax == 0) || ((eax & 0x500) != 0)) return VENDOR_INTEL;
return VENDOR_UNKNOWN;
}
static inline void xgetbv(int op, int * eax, int * edx){
//Use binary code for xgetbv
__asm__ __volatile__
(".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
}
int support_avx(){
int eax, ebx, ecx, edx;
int ret=0;
cpuid(1, &eax, &ebx, &ecx, &edx);
if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){
xgetbv(0, &eax, &edx);
if((eax & 6) == 6){
ret=1; //OS support AVX
}
}
return ret;
}
int cpu_detect()
{
int eax, ebx, ecx, edx;
int vendor, family, extend_family, model, extend_model;
if (!have_cpuid()) return CPUNAME_REFERENCE;
vendor=get_vendor();
cpuid(1, &eax, &ebx, &ecx, &edx);
extend_family = BITMASK(eax, 20, 0xff);
extend_model=BITMASK(eax, 16, 0x0f);
family=BITMASK(eax, 8, 0x0f);
model=BITMASK(eax, 4, 0x0f);
if (vendor == VENDOR_INTEL){
switch (family) {
case 0x6:
switch (extend_model) {
case 1:
switch (model) {
case 7:
//penryn uses dunnington config.
return CPUNAME_DUNNINGTON;
case 13:
return CPUNAME_DUNNINGTON;
}
break;
case 2:
switch (model) {
case 10:
case 13:
if(support_avx()) {
return CPUNAME_SANDYBRIDGE;
}else{
return CPUNAME_REFERENCE; //OS doesn't support AVX
}
}
break;
case 3:
switch (model) {
case 10:
case 14:
//Ivy Bridge
if(support_avx()) {
return CPUNAME_SANDYBRIDGE;
}else{
return CPUNAME_REFERENCE; //OS doesn't support AVX
}
case 12:
case 15:
//Haswell. Temp use Sandy Brdige
if(support_avx()) {
return CPUNAME_SANDYBRIDGE;
}else{
return CPUNAME_REFERENCE; //OS doesn't support AVX
}
}
break;
case 4:
switch (model) {
case 5:
case 6:
//Haswell. Temp use Sandy Brdige
if(support_avx()) {
return CPUNAME_SANDYBRIDGE;
}else{
return CPUNAME_REFERENCE; //OS doesn't support AVX
}
}
break;
}
break;
}
}else if (vendor == VENDOR_AMD){
switch (family) {
case 0xf:
switch (extend_family) {
case 6:
switch (model) {
case 1:
if(support_avx())
return CPUNAME_BULLDOZER;
else
return CPUNAME_REFERENCE; //OS don't support AVX.
case 2:
if(support_avx())
return CPUNAME_PILEDRIVER;
else
return CPUNAME_REFERENCE; //OS don't support AVX.
case 0:
//Steamroller. Temp use Piledriver.
if(support_avx())
return CPUNAME_PILEDRIVER;
else
return CPUNAME_REFERENCE; //OS don't support AVX.
}
}
break;
}
}
return CPUNAME_REFERENCE;
}
int main()
{
int cpuname_id;
cpuname_id=cpu_detect();
printf("%s\n", cpuname[cpuname_id]);
return 0;
}

8
configure vendored
View File

@@ -185,7 +185,7 @@ main()
# line option processing).
if [ $# = "0" ]; then
configs_avail=$(ls ${config_dirpath})
configs_avail="auto "$(ls ${config_dirpath})
echo "${script_name}: "
echo "${script_name}: *** No configuration given! ***"
@@ -216,7 +216,11 @@ main()
elif [ $# = "1" ]; then
config_name=$1
if [ $1 = "auto" ]; then
config_name=`${build_dirpath}/auto-detect/auto-detect.sh`
else
config_name=$1
fi
echo "${script_name}: configuring with '${config_name}' configuration sub-directory."
else