From 5ca632e0f007a2fa978d5f945af3e5223c43685a Mon Sep 17 00:00:00 2001 From: Harihara Sudhan S Date: Tue, 23 Aug 2022 13:33:28 +0530 Subject: [PATCH] Added API to check for BF16 ISA support - Checking for AVX512 bfloat 16 instructions support in architecture using the CPUID AMD-Internal: [CPUPL-2446] Change-Id: I088a8aa46b037af837b2e58a96b59eae70c1dbf0 --- frame/base/bli_cpuid.c | 63 ++++++++++++++++++++++++++++++++++++++++++ frame/base/bli_cpuid.h | 36 ++++++++++++------------ 2 files changed, 81 insertions(+), 18 deletions(-) diff --git a/frame/base/bli_cpuid.c b/frame/base/bli_cpuid.c index 552ab6e7a..4dba53080 100644 --- a/frame/base/bli_cpuid.c +++ b/frame/base/bli_cpuid.c @@ -597,7 +597,43 @@ void bli_cpuid_check_avx512vnni_support( void ) } } +// The support for AVX512_BF16 is checked only once (when this API is called +// first time). On subsequent calls the cached value is returned. +static bool is_avx512bf16_supported = FALSE; + +// Determine if the CPU has support for AVX512_BF16. +void bli_cpuid_check_avx512_bf16_support( void ) +{ + uint32_t family, model, features; + + // Call the CPUID instruction and parse its results into a family id, + // model id, and a feature bit field. + bli_cpuid_query( &family, &model, &features ); + + // Check for expected CPU features. + const uint32_t expected = FEATURE_AVX | + FEATURE_FMA3 | + FEATURE_AVX2 | + FEATURE_AVX512F | + FEATURE_AVX512DQ | + FEATURE_AVX512BW | + FEATURE_AVX512VL | + FEATURE_AVX512VNNI | + FEATURE_AVX512BF16 + ; + + if ( !bli_cpuid_has_features( features, expected ) ) + { + is_avx512bf16_supported = FALSE; + } + else + { + is_avx512bf16_supported = TRUE; + } +} + static bli_pthread_once_t once_check_avx512vnni_support = BLIS_PTHREAD_ONCE_INIT; +static bli_pthread_once_t once_check_avx512_bf16_support = BLIS_PTHREAD_ONCE_INIT; // Ensure that actual support determination happens only once void bli_cpuid_check_avx512vnni_support_once( void ) @@ -607,6 +643,14 @@ void bli_cpuid_check_avx512vnni_support_once( void ) #endif } +// Ensure that actual support determination happens only once to avoid performance hit +void bli_cpuid_check_avx512_bf16_support_once( void ) +{ +#ifndef BLIS_CONFIGURETIME_CPUID + bli_pthread_once( &once_check_avx512_bf16_support, bli_cpuid_check_avx512_bf16_support ); +#endif +} + // API to check if AVX512_VNNI is supported or not on the current platform. bool bli_cpuid_is_avx512vnni_supported( void ) { @@ -615,6 +659,14 @@ bool bli_cpuid_is_avx512vnni_supported( void ) return is_avx512vnni_supported; } +// API to check if AVX512_bf16 is supported or not on the current platform. +bool bli_cpuid_is_avx512_bf16_supported( void ) +{ + bli_cpuid_check_avx512_bf16_support_once(); + + return is_avx512bf16_supported; +} + #elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM) arch_t bli_cpuid_query_id( void ) @@ -816,6 +868,7 @@ enum FEATURE_MASK_AVX512BW = (1u<<30), // cpuid[eax=7,ecx=0] :ebx[30] FEATURE_MASK_AVX512VL = (1u<<31), // cpuid[eax=7,ecx=0] :ebx[31] FEATURE_MASK_AVX512VNNI = (1u<<11), // cpuid[eax=7,ecx=0] :ecx[11] + FEATURE_MASK_AVX512BF16 = (1u<< 5), // cpuid[eax=7,ecx=1] :eax[5] FEATURE_MASK_XGETBV = (1u<<26)| (1u<<27), // cpuid[eax=1] :ecx[27:26] XGETBV_MASK_XMM = 0x02u, // xcr0[1] @@ -884,6 +937,16 @@ uint32_t bli_cpuid_query if ( bli_cpuid_has_features( ebx, FEATURE_MASK_AVX512VL ) ) *features |= FEATURE_AVX512VL; if ( bli_cpuid_has_features( ecx, FEATURE_MASK_AVX512VNNI ) ) *features |= FEATURE_AVX512VNNI; + + // This is actually a macro that modifies the last four operands, + // hence why they are not passed by address. + // This returns extended feature flags in EAX. + // The availability of AVX512_BF16 can be found using the + // 5th feature bit of the returned value + __cpuid_count( 7, 1, eax, ebx, ecx, edx ); + + if ( bli_cpuid_has_features( eax, FEATURE_MASK_AVX512BF16 ) ) *features |= FEATURE_AVX512BF16; + } // Check extended processor info / features bits for AMD-specific features. diff --git a/frame/base/bli_cpuid.h b/frame/base/bli_cpuid.h index 439cef3e4..805f31bf2 100644 --- a/frame/base/bli_cpuid.h +++ b/frame/base/bli_cpuid.h @@ -135,6 +135,7 @@ void get_cpu_name( char *cpu_name ); int vpu_count( void ); bool bli_cpuid_is_avx_supported(void); bool bli_cpuid_is_avx512vnni_supported(void); +bool bli_cpuid_is_avx512_bf16_supported(void); enum { @@ -144,26 +145,25 @@ enum }; enum { - FEATURE_SSE3 = 0x0001, - FEATURE_SSSE3 = 0x0002, - FEATURE_SSE41 = 0x0004, - FEATURE_SSE42 = 0x0008, - FEATURE_AVX = 0x0010, - FEATURE_AVX2 = 0x0020, - FEATURE_FMA3 = 0x0040, - FEATURE_FMA4 = 0x0080, - FEATURE_AVX512F = 0x0100, - FEATURE_AVX512DQ = 0x0200, - FEATURE_AVX512PF = 0x0400, - FEATURE_AVX512ER = 0x0800, - FEATURE_AVX512CD = 0x1000, - FEATURE_AVX512BW = 0x2000, - FEATURE_AVX512VL = 0x4000, - FEATURE_AVX512VNNI = 0x8000 + FEATURE_SSE3 = 0x0001, + FEATURE_SSSE3 = 0x0002, + FEATURE_SSE41 = 0x0004, + FEATURE_SSE42 = 0x0008, + FEATURE_AVX = 0x0010, + FEATURE_AVX2 = 0x0020, + FEATURE_FMA3 = 0x0040, + FEATURE_FMA4 = 0x0080, + FEATURE_AVX512F = 0x0100, + FEATURE_AVX512DQ = 0x0200, + FEATURE_AVX512PF = 0x0400, + FEATURE_AVX512ER = 0x0800, + FEATURE_AVX512CD = 0x1000, + FEATURE_AVX512BW = 0x2000, + FEATURE_AVX512VL = 0x4000, + FEATURE_AVX512VNNI = 0x8000, + FEATURE_AVX512BF16 = 0x10000 }; - - #elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM) char* find_string_in( char* target, char* buffer, size_t buf_len, char* filepath );