mirror of
https://github.com/amd/blis.git
synced 2026-05-11 09:39:59 +00:00
Merge pull request #23 from xianyi/master
Add auto-detecting CPU on configure stage.
This commit is contained in:
15
build/auto-detect/arch_detect.c
Normal file
15
build/auto-detect/arch_detect.c
Normal file
@@ -0,0 +1,15 @@
|
||||
#if defined(__i386) || defined(_X86)
|
||||
ARCH_X86
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) || defined(__amd64__)
|
||||
ARCH_X86_64
|
||||
#endif
|
||||
|
||||
#if defined(__arm__)
|
||||
ARCH_ARM
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__)
|
||||
ARCH_AARCH64
|
||||
#endif
|
||||
105
build/auto-detect/auto-detect.sh
Executable file
105
build/auto-detect/auto-detect.sh
Executable file
@@ -0,0 +1,105 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# BLIS
|
||||
# An object-based framework for developing high-performance BLAS-like
|
||||
# libraries.
|
||||
#
|
||||
# Copyright (C) 2015, The University of Texas at Austin
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
# - Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# - Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# - Neither the name of The University of Texas at Austin nor the names
|
||||
# of its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
#
|
||||
|
||||
#
|
||||
# auto-detect.sh
|
||||
#
|
||||
# Zhang Xianyi
|
||||
#
|
||||
|
||||
|
||||
main()
|
||||
{
|
||||
CC=gcc
|
||||
CPUID_SRC=cpuid_x86.c
|
||||
CPUID_BIN=blis_cpu_detect
|
||||
ARCH=reference
|
||||
|
||||
# The name of the script, stripped of any preceeding path.
|
||||
script_name=${0##*/}
|
||||
|
||||
# The path to the script. We need this to find the top-level directory
|
||||
# of the source distribution in the event that the user has chosen to
|
||||
# build elsewhere.
|
||||
dist_path=${0%/${script_name}}
|
||||
|
||||
# The path to the directory in which we are building. We do this to
|
||||
# make explicit that we distinguish between the top-level directory
|
||||
# of the distribution and the directory in which we are building.
|
||||
cur_dirpath="."
|
||||
|
||||
|
||||
OSNAME=`uname`
|
||||
if [ $OSNAME = "Darwin" ]; then
|
||||
CC=clang
|
||||
fi
|
||||
|
||||
#
|
||||
# Detect architecture by predefined macros
|
||||
#
|
||||
|
||||
out1=`$CC -E ${dist_path}/arch_detect.c`
|
||||
|
||||
ARCH=`echo $out1 | grep -o "ARCH_[a-zA-Z0-9_]*" | head -n1`
|
||||
|
||||
if [ $ARCH = "ARCH_X86_64" ]; then
|
||||
CPUID_SRC=cpuid_x86.c
|
||||
elif [ $ARCH = "ARCH_X86" ]; then
|
||||
CPUID_SRC=cpuid_x86.c
|
||||
elif [ $ARCH = "ARCH_ARM" ]; then
|
||||
CPUID_SRC=cpuid_arm.c
|
||||
elif [ $ARCH = "ARCH_AARCH64" ]; then
|
||||
#Only support armv8 now
|
||||
echo "armv8a"
|
||||
return 0
|
||||
else
|
||||
echo "reference"
|
||||
return 0
|
||||
fi
|
||||
|
||||
#
|
||||
# Detect CPU cores
|
||||
#
|
||||
|
||||
$CC -o ${cur_dirpath}/$CPUID_BIN ${dist_path}/$CPUID_SRC
|
||||
${cur_dirpath}/$CPUID_BIN
|
||||
rm -rf ${cur_dirpath}/$CPUID_BIN
|
||||
|
||||
# Exit peacefully.
|
||||
return 0
|
||||
}
|
||||
|
||||
|
||||
# The script's main entry point, passing all parameters given.
|
||||
main "$@"
|
||||
146
build/auto-detect/cpuid_arm.c
Normal file
146
build/auto-detect/cpuid_arm.c
Normal file
@@ -0,0 +1,146 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2015, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define CPUNAME_REFERENCE 0
|
||||
#define CPUNAME_ARMV7 1
|
||||
#define CPUNAME_CORTEXA9 2
|
||||
#define CPUNAME_CORTEXA15 3
|
||||
|
||||
static char *cpuname[] = {
|
||||
"reference",
|
||||
"armv7a",
|
||||
"cortex-a9",
|
||||
"cortex-a15",
|
||||
};
|
||||
|
||||
|
||||
int get_feature(char *search)
|
||||
{
|
||||
FILE *infile;
|
||||
char buffer[2048], *p,*t;
|
||||
p = (char *) NULL;
|
||||
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
if (infile == NULL) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
while (fgets(buffer, sizeof(buffer), infile)) {
|
||||
if (!strncmp("Features", buffer, 8)) {
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
fclose(infile);
|
||||
|
||||
if( p == NULL ) return 0;
|
||||
|
||||
t = strtok(p," ");
|
||||
if (t != NULL) {
|
||||
if (!strcmp(t, search)) { return 1; }
|
||||
}
|
||||
while( t = strtok(NULL," ")){
|
||||
if (!strcmp(t, search)) { return 1; }
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cpu_detect(void)
|
||||
{
|
||||
FILE *infile;
|
||||
char buffer[512], *p;
|
||||
p = (char *) NULL ;
|
||||
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
if (infile == NULL) {
|
||||
return CPUNAME_REFERENCE;
|
||||
}
|
||||
while (fgets(buffer, sizeof(buffer), infile)) {
|
||||
if (!strncmp("CPU part", buffer, 8)) {
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
fclose(infile);
|
||||
|
||||
if(p != NULL) {
|
||||
if (strstr(p, "0xc09")) {
|
||||
if(get_feature("neon"))
|
||||
return CPUNAME_CORTEXA9;
|
||||
else
|
||||
return CPUNAME_ARMV7;
|
||||
}
|
||||
if (strstr(p, "0xc0f")) {
|
||||
if(get_feature("neon"))
|
||||
return CPUNAME_CORTEXA15;
|
||||
else
|
||||
return CPUNAME_ARMV7;
|
||||
}
|
||||
}
|
||||
|
||||
p = (char *) NULL ;
|
||||
infile = fopen("/proc/cpuinfo", "r");
|
||||
if (infile == NULL) {
|
||||
return CPUNAME_REFERENCE;
|
||||
}
|
||||
|
||||
while (fgets(buffer, sizeof(buffer), infile)) {
|
||||
if ((!strncmp("model name", buffer, 10)) || (!strncmp("Processor", buffer, 9))) {
|
||||
p = strchr(buffer, ':') + 2;
|
||||
break;
|
||||
}
|
||||
}
|
||||
fclose(infile);
|
||||
|
||||
if(p != NULL) {
|
||||
if (strstr(p, "ARMv7")) {
|
||||
return CPUNAME_ARMV7;
|
||||
}
|
||||
}
|
||||
|
||||
return CPUNAME_REFERENCE;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
int cpuname_id;
|
||||
|
||||
cpuname_id=cpu_detect();
|
||||
printf("%s\n", cpuname[cpuname_id]);
|
||||
return 0;
|
||||
}
|
||||
235
build/auto-detect/cpuid_x86.c
Normal file
235
build/auto-detect/cpuid_x86.c
Normal file
@@ -0,0 +1,235 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2015, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#define VENDOR_UNKNOWN 0
|
||||
#define VENDOR_INTEL 1
|
||||
#define VENDOR_AMD 2
|
||||
|
||||
#define CPUNAME_REFERENCE 0
|
||||
#define CPUNAME_DUNNINGTON 1
|
||||
#define CPUNAME_SANDYBRIDGE 2
|
||||
#define CPUNAME_BULLDOZER 3
|
||||
#define CPUNAME_PILEDRIVER 4
|
||||
|
||||
static char *cpuname[] = {
|
||||
"reference",
|
||||
"dunnington",
|
||||
"sandybridge",
|
||||
"bulldozer",
|
||||
"piledriver",
|
||||
};
|
||||
|
||||
#define BITMASK(a, b, c) ((((a) >> (b)) & (c)))
|
||||
|
||||
static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
|
||||
#if defined(__i386__) && defined(__PIC__)
|
||||
__asm__ __volatile__
|
||||
("mov %%ebx, %%edi;"
|
||||
"cpuid;"
|
||||
"xchgl %%ebx, %%edi;"
|
||||
: "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
|
||||
#else
|
||||
__asm__ __volatile__
|
||||
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline int have_cpuid(void){
|
||||
int eax, ebx, ecx, edx;
|
||||
|
||||
cpuid(0, &eax, &ebx, &ecx, &edx);
|
||||
return eax;
|
||||
}
|
||||
|
||||
|
||||
int get_vendor(void){
|
||||
int eax, ebx, ecx, edx;
|
||||
char vendor[13];
|
||||
|
||||
cpuid(0, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
*(int *)(&vendor[0]) = ebx;
|
||||
*(int *)(&vendor[4]) = edx;
|
||||
*(int *)(&vendor[8]) = ecx;
|
||||
vendor[12] = (char)0;
|
||||
|
||||
if (!strcmp(vendor, "GenuineIntel")) return VENDOR_INTEL;
|
||||
if (!strcmp(vendor, "AuthenticAMD")) return VENDOR_AMD;
|
||||
|
||||
if ((eax == 0) || ((eax & 0x500) != 0)) return VENDOR_INTEL;
|
||||
|
||||
return VENDOR_UNKNOWN;
|
||||
}
|
||||
|
||||
|
||||
static inline void xgetbv(int op, int * eax, int * edx){
|
||||
//Use binary code for xgetbv
|
||||
__asm__ __volatile__
|
||||
(".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc");
|
||||
}
|
||||
|
||||
int support_avx(){
|
||||
int eax, ebx, ecx, edx;
|
||||
int ret=0;
|
||||
|
||||
cpuid(1, &eax, &ebx, &ecx, &edx);
|
||||
if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){
|
||||
xgetbv(0, &eax, &edx);
|
||||
if((eax & 6) == 6){
|
||||
ret=1; //OS support AVX
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
int cpu_detect()
|
||||
{
|
||||
int eax, ebx, ecx, edx;
|
||||
int vendor, family, extend_family, model, extend_model;
|
||||
|
||||
if (!have_cpuid()) return CPUNAME_REFERENCE;
|
||||
|
||||
vendor=get_vendor();
|
||||
|
||||
cpuid(1, &eax, &ebx, &ecx, &edx);
|
||||
extend_family = BITMASK(eax, 20, 0xff);
|
||||
extend_model=BITMASK(eax, 16, 0x0f);
|
||||
family=BITMASK(eax, 8, 0x0f);
|
||||
model=BITMASK(eax, 4, 0x0f);
|
||||
|
||||
if (vendor == VENDOR_INTEL){
|
||||
switch (family) {
|
||||
case 0x6:
|
||||
switch (extend_model) {
|
||||
case 1:
|
||||
switch (model) {
|
||||
case 7:
|
||||
//penryn uses dunnington config.
|
||||
return CPUNAME_DUNNINGTON;
|
||||
case 13:
|
||||
return CPUNAME_DUNNINGTON;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
switch (model) {
|
||||
case 10:
|
||||
case 13:
|
||||
if(support_avx()) {
|
||||
return CPUNAME_SANDYBRIDGE;
|
||||
}else{
|
||||
return CPUNAME_REFERENCE; //OS doesn't support AVX
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
switch (model) {
|
||||
case 10:
|
||||
case 14:
|
||||
//Ivy Bridge
|
||||
if(support_avx()) {
|
||||
return CPUNAME_SANDYBRIDGE;
|
||||
}else{
|
||||
return CPUNAME_REFERENCE; //OS doesn't support AVX
|
||||
}
|
||||
case 12:
|
||||
case 15:
|
||||
//Haswell. Temp use Sandy Brdige
|
||||
if(support_avx()) {
|
||||
return CPUNAME_SANDYBRIDGE;
|
||||
}else{
|
||||
return CPUNAME_REFERENCE; //OS doesn't support AVX
|
||||
}
|
||||
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
switch (model) {
|
||||
case 5:
|
||||
case 6:
|
||||
//Haswell. Temp use Sandy Brdige
|
||||
if(support_avx()) {
|
||||
return CPUNAME_SANDYBRIDGE;
|
||||
}else{
|
||||
return CPUNAME_REFERENCE; //OS doesn't support AVX
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}else if (vendor == VENDOR_AMD){
|
||||
switch (family) {
|
||||
case 0xf:
|
||||
switch (extend_family) {
|
||||
case 6:
|
||||
switch (model) {
|
||||
case 1:
|
||||
if(support_avx())
|
||||
return CPUNAME_BULLDOZER;
|
||||
else
|
||||
return CPUNAME_REFERENCE; //OS don't support AVX.
|
||||
case 2:
|
||||
if(support_avx())
|
||||
return CPUNAME_PILEDRIVER;
|
||||
else
|
||||
return CPUNAME_REFERENCE; //OS don't support AVX.
|
||||
case 0:
|
||||
//Steamroller. Temp use Piledriver.
|
||||
if(support_avx())
|
||||
return CPUNAME_PILEDRIVER;
|
||||
else
|
||||
return CPUNAME_REFERENCE; //OS don't support AVX.
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return CPUNAME_REFERENCE;
|
||||
}
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
int cpuname_id;
|
||||
|
||||
cpuname_id=cpu_detect();
|
||||
|
||||
printf("%s\n", cpuname[cpuname_id]);
|
||||
return 0;
|
||||
}
|
||||
8
configure
vendored
8
configure
vendored
@@ -185,7 +185,7 @@ main()
|
||||
# line option processing).
|
||||
if [ $# = "0" ]; then
|
||||
|
||||
configs_avail=$(ls ${config_dirpath})
|
||||
configs_avail="auto "$(ls ${config_dirpath})
|
||||
|
||||
echo "${script_name}: "
|
||||
echo "${script_name}: *** No configuration given! ***"
|
||||
@@ -216,7 +216,11 @@ main()
|
||||
|
||||
elif [ $# = "1" ]; then
|
||||
|
||||
config_name=$1
|
||||
if [ $1 = "auto" ]; then
|
||||
config_name=`${build_dirpath}/auto-detect/auto-detect.sh`
|
||||
else
|
||||
config_name=$1
|
||||
fi
|
||||
echo "${script_name}: configuring with '${config_name}' configuration sub-directory."
|
||||
else
|
||||
|
||||
|
||||
Reference in New Issue
Block a user