mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
Added API to check for BF16 ISA support
- Checking for AVX512 bfloat 16 instructions support in architecture using the CPUID AMD-Internal: [CPUPL-2446] Change-Id: I088a8aa46b037af837b2e58a96b59eae70c1dbf0
This commit is contained in:
committed by
Nallani Bhaskar
parent
584069bf74
commit
5ca632e0f0
@@ -597,7 +597,43 @@ void bli_cpuid_check_avx512vnni_support( void )
|
||||
}
|
||||
}
|
||||
|
||||
// The support for AVX512_BF16 is checked only once (when this API is called
|
||||
// first time). On subsequent calls the cached value is returned.
|
||||
static bool is_avx512bf16_supported = FALSE;
|
||||
|
||||
// Determine if the CPU has support for AVX512_BF16.
|
||||
void bli_cpuid_check_avx512_bf16_support( void )
|
||||
{
|
||||
uint32_t family, model, features;
|
||||
|
||||
// Call the CPUID instruction and parse its results into a family id,
|
||||
// model id, and a feature bit field.
|
||||
bli_cpuid_query( &family, &model, &features );
|
||||
|
||||
// Check for expected CPU features.
|
||||
const uint32_t expected = FEATURE_AVX |
|
||||
FEATURE_FMA3 |
|
||||
FEATURE_AVX2 |
|
||||
FEATURE_AVX512F |
|
||||
FEATURE_AVX512DQ |
|
||||
FEATURE_AVX512BW |
|
||||
FEATURE_AVX512VL |
|
||||
FEATURE_AVX512VNNI |
|
||||
FEATURE_AVX512BF16
|
||||
;
|
||||
|
||||
if ( !bli_cpuid_has_features( features, expected ) )
|
||||
{
|
||||
is_avx512bf16_supported = FALSE;
|
||||
}
|
||||
else
|
||||
{
|
||||
is_avx512bf16_supported = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
static bli_pthread_once_t once_check_avx512vnni_support = BLIS_PTHREAD_ONCE_INIT;
|
||||
static bli_pthread_once_t once_check_avx512_bf16_support = BLIS_PTHREAD_ONCE_INIT;
|
||||
|
||||
// Ensure that actual support determination happens only once
|
||||
void bli_cpuid_check_avx512vnni_support_once( void )
|
||||
@@ -607,6 +643,14 @@ void bli_cpuid_check_avx512vnni_support_once( void )
|
||||
#endif
|
||||
}
|
||||
|
||||
// Ensure that actual support determination happens only once to avoid performance hit
|
||||
void bli_cpuid_check_avx512_bf16_support_once( void )
|
||||
{
|
||||
#ifndef BLIS_CONFIGURETIME_CPUID
|
||||
bli_pthread_once( &once_check_avx512_bf16_support, bli_cpuid_check_avx512_bf16_support );
|
||||
#endif
|
||||
}
|
||||
|
||||
// API to check if AVX512_VNNI is supported or not on the current platform.
|
||||
bool bli_cpuid_is_avx512vnni_supported( void )
|
||||
{
|
||||
@@ -615,6 +659,14 @@ bool bli_cpuid_is_avx512vnni_supported( void )
|
||||
return is_avx512vnni_supported;
|
||||
}
|
||||
|
||||
// API to check if AVX512_bf16 is supported or not on the current platform.
|
||||
bool bli_cpuid_is_avx512_bf16_supported( void )
|
||||
{
|
||||
bli_cpuid_check_avx512_bf16_support_once();
|
||||
|
||||
return is_avx512bf16_supported;
|
||||
}
|
||||
|
||||
#elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM)
|
||||
|
||||
arch_t bli_cpuid_query_id( void )
|
||||
@@ -816,6 +868,7 @@ enum
|
||||
FEATURE_MASK_AVX512BW = (1u<<30), // cpuid[eax=7,ecx=0] :ebx[30]
|
||||
FEATURE_MASK_AVX512VL = (1u<<31), // cpuid[eax=7,ecx=0] :ebx[31]
|
||||
FEATURE_MASK_AVX512VNNI = (1u<<11), // cpuid[eax=7,ecx=0] :ecx[11]
|
||||
FEATURE_MASK_AVX512BF16 = (1u<< 5), // cpuid[eax=7,ecx=1] :eax[5]
|
||||
FEATURE_MASK_XGETBV = (1u<<26)|
|
||||
(1u<<27), // cpuid[eax=1] :ecx[27:26]
|
||||
XGETBV_MASK_XMM = 0x02u, // xcr0[1]
|
||||
@@ -884,6 +937,16 @@ uint32_t bli_cpuid_query
|
||||
if ( bli_cpuid_has_features( ebx, FEATURE_MASK_AVX512VL ) ) *features |= FEATURE_AVX512VL;
|
||||
|
||||
if ( bli_cpuid_has_features( ecx, FEATURE_MASK_AVX512VNNI ) ) *features |= FEATURE_AVX512VNNI;
|
||||
|
||||
// This is actually a macro that modifies the last four operands,
|
||||
// hence why they are not passed by address.
|
||||
// This returns extended feature flags in EAX.
|
||||
// The availability of AVX512_BF16 can be found using the
|
||||
// 5th feature bit of the returned value
|
||||
__cpuid_count( 7, 1, eax, ebx, ecx, edx );
|
||||
|
||||
if ( bli_cpuid_has_features( eax, FEATURE_MASK_AVX512BF16 ) ) *features |= FEATURE_AVX512BF16;
|
||||
|
||||
}
|
||||
|
||||
// Check extended processor info / features bits for AMD-specific features.
|
||||
|
||||
@@ -135,6 +135,7 @@ void get_cpu_name( char *cpu_name );
|
||||
int vpu_count( void );
|
||||
bool bli_cpuid_is_avx_supported(void);
|
||||
bool bli_cpuid_is_avx512vnni_supported(void);
|
||||
bool bli_cpuid_is_avx512_bf16_supported(void);
|
||||
|
||||
enum
|
||||
{
|
||||
@@ -144,26 +145,25 @@ enum
|
||||
};
|
||||
enum
|
||||
{
|
||||
FEATURE_SSE3 = 0x0001,
|
||||
FEATURE_SSSE3 = 0x0002,
|
||||
FEATURE_SSE41 = 0x0004,
|
||||
FEATURE_SSE42 = 0x0008,
|
||||
FEATURE_AVX = 0x0010,
|
||||
FEATURE_AVX2 = 0x0020,
|
||||
FEATURE_FMA3 = 0x0040,
|
||||
FEATURE_FMA4 = 0x0080,
|
||||
FEATURE_AVX512F = 0x0100,
|
||||
FEATURE_AVX512DQ = 0x0200,
|
||||
FEATURE_AVX512PF = 0x0400,
|
||||
FEATURE_AVX512ER = 0x0800,
|
||||
FEATURE_AVX512CD = 0x1000,
|
||||
FEATURE_AVX512BW = 0x2000,
|
||||
FEATURE_AVX512VL = 0x4000,
|
||||
FEATURE_AVX512VNNI = 0x8000
|
||||
FEATURE_SSE3 = 0x0001,
|
||||
FEATURE_SSSE3 = 0x0002,
|
||||
FEATURE_SSE41 = 0x0004,
|
||||
FEATURE_SSE42 = 0x0008,
|
||||
FEATURE_AVX = 0x0010,
|
||||
FEATURE_AVX2 = 0x0020,
|
||||
FEATURE_FMA3 = 0x0040,
|
||||
FEATURE_FMA4 = 0x0080,
|
||||
FEATURE_AVX512F = 0x0100,
|
||||
FEATURE_AVX512DQ = 0x0200,
|
||||
FEATURE_AVX512PF = 0x0400,
|
||||
FEATURE_AVX512ER = 0x0800,
|
||||
FEATURE_AVX512CD = 0x1000,
|
||||
FEATURE_AVX512BW = 0x2000,
|
||||
FEATURE_AVX512VL = 0x4000,
|
||||
FEATURE_AVX512VNNI = 0x8000,
|
||||
FEATURE_AVX512BF16 = 0x10000
|
||||
};
|
||||
|
||||
|
||||
|
||||
#elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM)
|
||||
|
||||
char* find_string_in( char* target, char* buffer, size_t buf_len, char* filepath );
|
||||
|
||||
Reference in New Issue
Block a user