Determine AMD FP/SIMD execution datapath width

Different Zen processors may have a 512-bit, 256-bit or 128-bit
FP/SIMD execution datapath width (FP512, FP256, FP128). Zen5 allows
a selection of FP512 or FP256 width in BIOS settings. Add cpuid
code to detect the width and store an indication of it in the
global variable bli_fp_datapath. This should be accessed internally
via the function bli_cpuid_query_fp_datapath(). This functionality
is currently only enabled on x86_64 platforms and only currently
reports a value for AMD CPUs.

Also add Zen3 as a fallback path for any unknown AMD processors if
AVX512 is not supported or has been disabled.

AMD-Internal: [CPUPL-4415]
Change-Id: Idf3fb5a697b43bc035ce110e86f60706dcc67f2a
This commit is contained in:
Edward Smyth
2024-08-28 09:23:37 -04:00
parent 6dcf500703
commit 1f18eeb267
2 changed files with 83 additions and 3 deletions

View File

@@ -94,6 +94,9 @@ static bool is_avx512_supported = FALSE;
static bool is_avx512vnni_supported = FALSE;
static bool is_avx512bf16_supported = FALSE;
// Variable to represent FP/SIMD execution datapath width.
static uint32_t bli_fp_datapath = -1;
// Variables to store the cache sizes (in KB). L3 size is shared by all
// logical processors in the package (i.e. per socket).
static uint32_t bli_l1d_cache_size = -1;
@@ -118,6 +121,9 @@ arch_t bli_cpuid_query_id( void )
bli_cpuid_check_avx512vnni_support( family, model, features );
bli_cpuid_check_avx512bf16_support( family, model, features );
// Check FP/SIMD execution datapath
bli_cpuid_check_datapath( vendor, features );
// Find out cache sizes and set in static variables.
// Currently only enabled for VENDOR_AMD.
bli_cpuid_check_cache( vendor );
@@ -134,6 +140,9 @@ arch_t bli_cpuid_query_id( void )
printf( "AVX512 VNNI = %d\n", is_avx512vnni_supported );
printf( "AVX512 BF16 = %d\n", is_avx512bf16_supported );
const char* datapath_names[] = {"UNSET", "FP128", "INVALID", "FP256", "FP512"};
printf( "FP/SIMD datapath = %d (%s)\n", bli_fp_datapath, datapath_names[bli_fp_datapath+1] );
printf( "Cache Information:\n" );
printf( "L1I size = %u KB\n",bli_l1i_cache_size );
printf( "L1D size = %u KB\n",bli_l1d_cache_size );
@@ -219,6 +228,12 @@ arch_t bli_cpuid_query_id( void )
if ( bli_cpuid_is_zen( family, model, features ) )
return BLIS_ARCH_ZEN;
#endif
#ifdef BLIS_CONFIG_ZEN3
// Fallback test for future AMD processors
// Use zen3 if AVX512 support is not available but AVX2 is.
if ( is_avx2fma3_supported )
return BLIS_ARCH_ZEN3;
#endif
#ifdef BLIS_CONFIG_EXCAVATOR
if ( bli_cpuid_is_excavator( family, model, features ) )
return BLIS_ARCH_EXCAVATOR;
@@ -914,6 +929,12 @@ bool bli_cpuid_is_avx512bf16_supported( void )
return is_avx512bf16_supported;
}
uint32_t bli_cpuid_query_fp_datapath( void )
{
bli_cpuid_query_id_once();
return bli_fp_datapath;
}
uint32_t bli_cpuid_query_l1d_cache_size( void )
{
bli_cpuid_query_id_once();
@@ -1111,7 +1132,10 @@ enum
(1u<<27), // cpuid[eax=1] :ecx[27:26]
XGETBV_MASK_XMM = 0x02u, // xcr0[1]
XGETBV_MASK_YMM = 0x04u, // xcr0[2]
XGETBV_MASK_ZMM = 0xe0u // xcr0[7:5]
XGETBV_MASK_ZMM = 0xe0u, // xcr0[7:5]
FEATURE_MASK_DATAPATH_FP128 = (1u<<0), // cpuid[eax=0x8000001A] :eax[0]
FEATURE_MASK_DATAPATH_FP256 = (1u<<2), // cpuid[eax=0x8000001A] :eax[2]
FEATURE_MASK_DATAPATH_FP512 = (1u<<3) // cpuid[eax=0x8000001A] :eax[3]
};
@@ -1189,7 +1213,6 @@ uint32_t bli_cpuid_query
if ( bli_cpuid_has_features( eax, FEATURE_MASK_AVXVNNI ) ) *features |= FEATURE_AVXVNNI;
if ( bli_cpuid_has_features( eax, FEATURE_MASK_AVX512BF16 ) ) *features |= FEATURE_AVX512BF16;
}
// Check extended processor info / features bits for AMD-specific features.
@@ -1207,6 +1230,17 @@ uint32_t bli_cpuid_query
if ( bli_cpuid_has_features( ecx, FEATURE_MASK_FMA4 ) ) *features |= FEATURE_FMA4;
}
if ( cpuid_max_ext >= 0x8000001Au )
{
// This is actually a macro that modifies the last four operands,
// hence why they are not passed by address.
// This returns extended feature flags in EAX.
__cpuid( 0x8000001A, eax, ebx, ecx, edx );
if ( bli_cpuid_has_features( eax, FEATURE_MASK_DATAPATH_FP128 ) ) *features |= FEATURE_DATAPATH_FP128;
if ( bli_cpuid_has_features( eax, FEATURE_MASK_DATAPATH_FP256 ) ) *features |= FEATURE_DATAPATH_FP256;
if ( bli_cpuid_has_features( eax, FEATURE_MASK_DATAPATH_FP512 ) ) *features |= FEATURE_DATAPATH_FP512;
}
// Unconditionally check processor info / features bits.
{
@@ -1377,6 +1411,34 @@ uint32_t bli_cpuid_query
return VENDOR_UNKNOWN;
}
void bli_cpuid_check_datapath(
uint32_t vendor,
uint32_t features )
{
if ( vendor == VENDOR_AMD )
{
uint32_t expected;
expected = FEATURE_DATAPATH_FP512;
if ( bli_cpuid_has_features( features, expected ) )
{
bli_fp_datapath = DATAPATH_FP512;
return;
}
expected = FEATURE_DATAPATH_FP256;
if ( bli_cpuid_has_features( features, expected ) )
{
bli_fp_datapath = DATAPATH_FP256;
return;
}
expected = FEATURE_DATAPATH_FP128;
if ( bli_cpuid_has_features( features, expected ) )
{
bli_fp_datapath = DATAPATH_FP128;
return;
}
}
}
void bli_cpuid_check_cache( uint32_t vendor )
{
if ( vendor == VENDOR_AMD )

View File

@@ -55,6 +55,8 @@ arch_t bli_cpuid_query_id( void );
model_t bli_cpuid_query_model_id( arch_t id );
uint32_t bli_cpuid_query_fp_datapath( void );
uint32_t bli_cpuid_query_l1d_cache_size( void );
uint32_t bli_cpuid_query_l1i_cache_size( void );
uint32_t bli_cpuid_query_l2_cache_size( void );
@@ -94,6 +96,8 @@ bool bli_cpuid_is_cortexa9( uint32_t model, uint32_t part, uint32_t features );
uint32_t bli_cpuid_query( uint32_t* family, uint32_t* model, uint32_t* features );
void bli_cpuid_check_datapath( uint32_t vendor, uint32_t features );
void bli_cpuid_check_cache( uint32_t vendor );
// -----------------------------------------------------------------------------
@@ -189,7 +193,21 @@ enum
FEATURE_AVXVNNI = 0x20000,
FEATURE_AVX512VP2INTERSECT = 0x40000,
FEATURE_MOVDIRI = 0x80000,
FEATURE_MOVDIR64B = 0x100000
FEATURE_MOVDIR64B = 0x100000,
FEATURE_DATAPATH_FP128 = 0x200000,
FEATURE_DATAPATH_FP256 = 0x400000,
FEATURE_DATAPATH_FP512 = 0x800000
};
// To reduce confusion, include MOVU bit so enum values match those in
// CPUID_Fn8000001A_EAX id function.
enum
{
DATAPATH_UNSET = -1,
DATAPATH_FP128,
DATAPATH_MOVU,
DATAPATH_FP256,
DATAPATH_FP512
};
#elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM)