mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
BLIS: zen5 cpuid and arch changes
Implement initial support for Zen5 systems: - Detect new Zen5 AVXVNNI, AVX512VP2INTERSECT, MOVDIRI and MOVDIR64B instructions. - Assume for now that Zen5 will use Zen4 code path. BLIS_ARCH_TYPE=zen5 will therefore function as an alias for BLIS_ARCH_TYPE=zen4, but different hardware model will still be detected. AMD-Internal: [CPUPL-3518] Change-Id: I00fb413d743f152a5412ace3e740df1fd39a1600
This commit is contained in:
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2022 - 2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2022 - 2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -49,8 +49,7 @@
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 6144, 4002, 4080, 2004 ); \
|
||||
\
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 5, 5, -1, -1 ); \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 ); \
|
||||
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 );
|
||||
|
||||
#define BLI_CNTX_DEFAULT_BLKSZ_LIST_BERGAMO(blkszs) \
|
||||
/* s d c z */ \
|
||||
@@ -62,8 +61,33 @@
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 6144, 3600, 4080, 2004 ); \
|
||||
\
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 5, 5, -1, -1 ); \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 ); \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 );
|
||||
|
||||
/* Starting point for Turin, copied from Genoa */
|
||||
#define BLI_CNTX_DEFAULT_BLKSZ_LIST_TURIN(blkszs) \
|
||||
/* s d c z */ \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MR ], 32, 32, 3, 12 ); \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NR ], 12, 6, 8, 4 ); \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 512, 128, 144, 60 ); \
|
||||
bli_blksz_init ( &blkszs[ BLIS_KC ], 480, 512, 256, 512, \
|
||||
480, 320, 256, 160 ); \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 6144, 4002, 4080, 2004 ); \
|
||||
\
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 5, 5, -1, -1 ); \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 );
|
||||
|
||||
/* Starting point for Turin Dense, copied from Bergamo */
|
||||
#define BLI_CNTX_DEFAULT_BLKSZ_LIST_TURIN_DENSE(blkszs) \
|
||||
/* s d c z */ \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MR ], 32, 32, 3, 12 ); \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NR ], 12, 6, 8, 4 ); \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 512, 64, 144, 60 ); \
|
||||
bli_blksz_init ( &blkszs[ BLIS_KC ], 480, 512, 256, 512, \
|
||||
480, 320, 256, 160 ); \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 6144, 3600, 4080, 2004 ); \
|
||||
\
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 5, 5, -1, -1 ); \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 );
|
||||
|
||||
void bli_cntx_init_zen4( cntx_t* cntx )
|
||||
{
|
||||
@@ -212,7 +236,15 @@ void bli_cntx_init_zen4( cntx_t* cntx )
|
||||
// These are reference block sizes and may be overridden based on
|
||||
// number of threads used at runtime.
|
||||
|
||||
if ( bli_init_model_query_id() == BLIS_MODEL_BERGAMO )
|
||||
if ( bli_init_model_query_id() == BLIS_MODEL_TURIN_DENSE )
|
||||
{
|
||||
BLI_CNTX_DEFAULT_BLKSZ_LIST_TURIN_DENSE(blkszs);
|
||||
}
|
||||
else if ( bli_init_model_query_id() == BLIS_MODEL_TURIN )
|
||||
{
|
||||
BLI_CNTX_DEFAULT_BLKSZ_LIST_TURIN(blkszs);
|
||||
}
|
||||
else if ( bli_init_model_query_id() == BLIS_MODEL_BERGAMO )
|
||||
{
|
||||
BLI_CNTX_DEFAULT_BLKSZ_LIST_BERGAMO(blkszs);
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018 - 2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2018 - 2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -569,6 +569,7 @@ static char* config_name[ BLIS_NUM_ARCHS ] =
|
||||
"sandybridge",
|
||||
"penryn",
|
||||
|
||||
"zen5",
|
||||
"zen4",
|
||||
"zen3",
|
||||
"zen2",
|
||||
@@ -609,6 +610,9 @@ static char* model_name[ BLIS_NUM_MODELS ] =
|
||||
|
||||
"default",
|
||||
|
||||
"Turin",
|
||||
"Turin Dense",
|
||||
|
||||
"Genoa",
|
||||
"Bergamo",
|
||||
"Genoa-X",
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018 - 2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2018 - 2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -898,9 +898,22 @@ err_t bli_check_valid_model_id( arch_t arch_id, model_t model_id )
|
||||
// Model ranges are specified in bli_type_defs.h
|
||||
err_t e_val = BLIS_INVALID_MODEL_ID;
|
||||
|
||||
if ( arch_id == BLIS_ARCH_ZEN5 )
|
||||
{
|
||||
if ( ( gint_t )model_id >= BLIS_MODEL_TURIN &&
|
||||
( gint_t )model_id <= BLIS_MODEL_TURIN_DENSE )
|
||||
{
|
||||
e_val = BLIS_SUCCESS;
|
||||
}
|
||||
if ( ( gint_t )model_id >= BLIS_MODEL_GENOA &&
|
||||
( gint_t )model_id <= BLIS_MODEL_GENOA_X )
|
||||
{
|
||||
e_val = BLIS_SUCCESS;
|
||||
}
|
||||
}
|
||||
if ( arch_id == BLIS_ARCH_ZEN4 )
|
||||
{
|
||||
if ( ( gint_t )model_id >= BLIS_MODEL_GENOA &&
|
||||
if ( ( gint_t )model_id >= BLIS_MODEL_TURIN &&
|
||||
( gint_t )model_id <= BLIS_MODEL_GENOA_X )
|
||||
{
|
||||
e_val = BLIS_SUCCESS;
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018 - 2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2018 - 2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2019, Dave Love, University of Manchester
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -185,10 +185,11 @@ arch_t bli_cpuid_query_id( void )
|
||||
}
|
||||
else if ( vendor == VENDOR_AMD )
|
||||
{
|
||||
|
||||
// Check for each AMD configuration that is enabled, check for that
|
||||
// microarchitecture. We check from most recent to most dated.
|
||||
#ifdef BLIS_CONFIG_ZEN4
|
||||
if ( bli_cpuid_is_zen5( family, model, features ) )
|
||||
return BLIS_ARCH_ZEN4;
|
||||
if ( bli_cpuid_is_zen4( family, model, features ) )
|
||||
return BLIS_ARCH_ZEN4;
|
||||
// Fallback test for future AMD processors
|
||||
@@ -240,6 +241,22 @@ model_t bli_cpuid_query_model_id( arch_t arch_id )
|
||||
// Set default for architectures where separate models haven't been defined.
|
||||
model_t cpuid_model = BLIS_MODEL_DEFAULT;
|
||||
|
||||
#ifdef BLIS_CONFIG_ZEN5
|
||||
if (arch_id == BLIS_ARCH_ZEN5)
|
||||
{
|
||||
// Call the CPUID instruction and parse its results into a family id,
|
||||
// model id, and a feature bit field. The return value encodes the
|
||||
// vendor.
|
||||
|
||||
uint32_t __attribute__ ((unused)) vendor;
|
||||
uint32_t family, model, features;
|
||||
|
||||
vendor = bli_cpuid_query( &family, &model, &features );
|
||||
|
||||
// Check CPU model.
|
||||
cpuid_model = bli_cpuid_get_zen5_cpuid_model( family, model, features );
|
||||
}
|
||||
#endif
|
||||
#ifdef BLIS_CONFIG_ZEN4
|
||||
if (arch_id == BLIS_ARCH_ZEN4)
|
||||
{
|
||||
@@ -252,8 +269,16 @@ model_t bli_cpuid_query_model_id( arch_t arch_id )
|
||||
|
||||
vendor = bli_cpuid_query( &family, &model, &features );
|
||||
|
||||
// Check CPU model.
|
||||
cpuid_model = bli_cpuid_get_zen4_cpuid_model( family, model, features );
|
||||
// For now, zen4 code path is also used for zen5 so check
|
||||
// for zen5 models here too.
|
||||
if ( family == 0x19 )
|
||||
{
|
||||
cpuid_model = bli_cpuid_get_zen4_cpuid_model( family, model, features );
|
||||
}
|
||||
else if ( family == 0x1A )
|
||||
{
|
||||
cpuid_model = bli_cpuid_get_zen5_cpuid_model( family, model, features );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef BLIS_CONFIG_ZEN3
|
||||
@@ -386,6 +411,60 @@ bool bli_cpuid_is_penryn
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
bool bli_cpuid_is_zen5
|
||||
(
|
||||
uint32_t family,
|
||||
uint32_t model,
|
||||
uint32_t features
|
||||
)
|
||||
{
|
||||
// Check for expected CPU features.
|
||||
const uint32_t expected = FEATURE_SSE3 |
|
||||
FEATURE_SSSE3 |
|
||||
FEATURE_SSE41 |
|
||||
FEATURE_SSE42 |
|
||||
FEATURE_AVX |
|
||||
FEATURE_FMA3 |
|
||||
FEATURE_AVX2 |
|
||||
FEATURE_AVX512F |
|
||||
FEATURE_AVX512DQ |
|
||||
FEATURE_AVX512CD |
|
||||
FEATURE_AVX512BW |
|
||||
FEATURE_AVX512VL |
|
||||
FEATURE_AVX512VNNI |
|
||||
FEATURE_AVX512BF16 |
|
||||
FEATURE_MOVDIRI |
|
||||
FEATURE_MOVDIR64B |
|
||||
FEATURE_AVX512VP2INTERSECT |
|
||||
FEATURE_AVXVNNI;
|
||||
|
||||
if ( !bli_cpuid_has_features( features, expected ) ) return FALSE;
|
||||
|
||||
// For zen5 the family id is 0x1A
|
||||
if ( family != 0x1A ) return FALSE;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
model_t bli_cpuid_get_zen5_cpuid_model
|
||||
(
|
||||
uint32_t family,
|
||||
uint32_t model,
|
||||
uint32_t features
|
||||
)
|
||||
{
|
||||
// Look at model of CPU and set cpuid_model appropriately.
|
||||
// For Zen5, the default is Turin.
|
||||
model_t cpuid_model = BLIS_MODEL_TURIN;
|
||||
if ( family == 0x1A )
|
||||
{
|
||||
if ( 0x10 <= model && model <= 0x1f ) // Turin Dense
|
||||
{
|
||||
cpuid_model = BLIS_MODEL_TURIN_DENSE;
|
||||
}
|
||||
}
|
||||
return cpuid_model;
|
||||
}
|
||||
|
||||
bool bli_cpuid_is_zen4
|
||||
(
|
||||
uint32_t family,
|
||||
@@ -438,6 +517,14 @@ model_t bli_cpuid_get_zen4_cpuid_model
|
||||
{
|
||||
cpuid_model = BLIS_MODEL_BERGAMO;
|
||||
}
|
||||
else
|
||||
{
|
||||
uint32_t l3_cache_size = bli_cpuid_query_l3_cache_size();
|
||||
if ( l3_cache_size > 393216 )
|
||||
{
|
||||
cpuid_model = BLIS_MODEL_GENOA_X;
|
||||
}
|
||||
}
|
||||
}
|
||||
return cpuid_model;
|
||||
}
|
||||
@@ -1060,7 +1147,7 @@ model_t bli_cpuid_query_model_id( arch_t arch_id )
|
||||
|
||||
Copyright (C) 2017, The University of Texas at Austin
|
||||
Copyright (C) 2017, Devin Matthews
|
||||
Copyright (C) 2018 - 2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2018 - 2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -1092,29 +1179,33 @@ model_t bli_cpuid_query_model_id( arch_t arch_id )
|
||||
|
||||
enum
|
||||
{
|
||||
// input register(s) output register
|
||||
FEATURE_MASK_SSE3 = (1u<< 0), // cpuid[eax=1] :ecx[0]
|
||||
FEATURE_MASK_SSSE3 = (1u<< 9), // cpuid[eax=1] :ecx[9]
|
||||
FEATURE_MASK_SSE41 = (1u<<19), // cpuid[eax=1] :ecx[19]
|
||||
FEATURE_MASK_SSE42 = (1u<<20), // cpuid[eax=1] :ecx[20]
|
||||
FEATURE_MASK_AVX = (1u<<28), // cpuid[eax=1] :ecx[28]
|
||||
FEATURE_MASK_AVX2 = (1u<< 5), // cpuid[eax=7,ecx=0] :ebx[5]
|
||||
FEATURE_MASK_FMA3 = (1u<<12), // cpuid[eax=1] :ecx[12]
|
||||
FEATURE_MASK_FMA4 = (1u<<16), // cpuid[eax=0x80000001]:ecx[16]
|
||||
FEATURE_MASK_AVX512F = (1u<<16), // cpuid[eax=7,ecx=0] :ebx[16]
|
||||
FEATURE_MASK_AVX512DQ = (1u<<17), // cpuid[eax=7,ecx=0] :ebx[17]
|
||||
FEATURE_MASK_AVX512PF = (1u<<26), // cpuid[eax=7,ecx=0] :ebx[26]
|
||||
FEATURE_MASK_AVX512ER = (1u<<27), // cpuid[eax=7,ecx=0] :ebx[27]
|
||||
FEATURE_MASK_AVX512CD = (1u<<28), // cpuid[eax=7,ecx=0] :ebx[28]
|
||||
FEATURE_MASK_AVX512BW = (1u<<30), // cpuid[eax=7,ecx=0] :ebx[30]
|
||||
FEATURE_MASK_AVX512VL = (1u<<31), // cpuid[eax=7,ecx=0] :ebx[31]
|
||||
FEATURE_MASK_AVX512VNNI = (1u<<11), // cpuid[eax=7,ecx=0] :ecx[11]
|
||||
FEATURE_MASK_AVX512BF16 = (1u<< 5), // cpuid[eax=7,ecx=1] :eax[5]
|
||||
FEATURE_MASK_XGETBV = (1u<<26)|
|
||||
(1u<<27), // cpuid[eax=1] :ecx[27:26]
|
||||
XGETBV_MASK_XMM = 0x02u, // xcr0[1]
|
||||
XGETBV_MASK_YMM = 0x04u, // xcr0[2]
|
||||
XGETBV_MASK_ZMM = 0xe0u // xcr0[7:5]
|
||||
// input register(s) output register
|
||||
FEATURE_MASK_SSE3 = (1u<< 0), // cpuid[eax=1] :ecx[0]
|
||||
FEATURE_MASK_SSSE3 = (1u<< 9), // cpuid[eax=1] :ecx[9]
|
||||
FEATURE_MASK_SSE41 = (1u<<19), // cpuid[eax=1] :ecx[19]
|
||||
FEATURE_MASK_SSE42 = (1u<<20), // cpuid[eax=1] :ecx[20]
|
||||
FEATURE_MASK_AVX = (1u<<28), // cpuid[eax=1] :ecx[28]
|
||||
FEATURE_MASK_AVX2 = (1u<< 5), // cpuid[eax=7,ecx=0] :ebx[5]
|
||||
FEATURE_MASK_FMA3 = (1u<<12), // cpuid[eax=1] :ecx[12]
|
||||
FEATURE_MASK_FMA4 = (1u<<16), // cpuid[eax=0x80000001] :ecx[16]
|
||||
FEATURE_MASK_AVX512F = (1u<<16), // cpuid[eax=7,ecx=0] :ebx[16]
|
||||
FEATURE_MASK_AVX512DQ = (1u<<17), // cpuid[eax=7,ecx=0] :ebx[17]
|
||||
FEATURE_MASK_AVX512PF = (1u<<26), // cpuid[eax=7,ecx=0] :ebx[26]
|
||||
FEATURE_MASK_AVX512ER = (1u<<27), // cpuid[eax=7,ecx=0] :ebx[27]
|
||||
FEATURE_MASK_AVX512CD = (1u<<28), // cpuid[eax=7,ecx=0] :ebx[28]
|
||||
FEATURE_MASK_AVX512BW = (1u<<30), // cpuid[eax=7,ecx=0] :ebx[30]
|
||||
FEATURE_MASK_AVX512VL = (1u<<31), // cpuid[eax=7,ecx=0] :ebx[31]
|
||||
FEATURE_MASK_AVX512VNNI = (1u<<11), // cpuid[eax=7,ecx=0] :ecx[11]
|
||||
FEATURE_MASK_MOVDIRI = (1u<<27), // cpuid[eax=7,ecx=0] :ecx[27]
|
||||
FEATURE_MASK_MOVDIR64B = (1u<<28), // cpuid[eax=7,ecx=0] :ecx[28]
|
||||
FEATURE_MASK_AVX512VP2INTERSECT = (1u<<8), // cpuid[eax=7,ecx=0] :edx[8]
|
||||
FEATURE_MASK_AVXVNNI = (1u<< 4), // cpuid[eax=7,ecx=1] :eax[4]
|
||||
FEATURE_MASK_AVX512BF16 = (1u<< 5), // cpuid[eax=7,ecx=1] :eax[5]
|
||||
FEATURE_MASK_XGETBV = (1u<<26)|
|
||||
(1u<<27), // cpuid[eax=1] :ecx[27:26]
|
||||
XGETBV_MASK_XMM = 0x02u, // xcr0[1]
|
||||
XGETBV_MASK_YMM = 0x04u, // xcr0[2]
|
||||
XGETBV_MASK_ZMM = 0xe0u // xcr0[7:5]
|
||||
};
|
||||
|
||||
|
||||
@@ -1178,6 +1269,10 @@ uint32_t bli_cpuid_query
|
||||
if ( bli_cpuid_has_features( ebx, FEATURE_MASK_AVX512VL ) ) *features |= FEATURE_AVX512VL;
|
||||
|
||||
if ( bli_cpuid_has_features( ecx, FEATURE_MASK_AVX512VNNI ) ) *features |= FEATURE_AVX512VNNI;
|
||||
if ( bli_cpuid_has_features( ecx, FEATURE_MASK_MOVDIRI ) ) *features |= FEATURE_MOVDIRI;
|
||||
if ( bli_cpuid_has_features( ecx, FEATURE_MASK_MOVDIR64B ) ) *features |= FEATURE_MOVDIR64B;
|
||||
|
||||
if ( bli_cpuid_has_features( edx, FEATURE_MASK_AVX512VP2INTERSECT ) ) *features |= FEATURE_AVX512VP2INTERSECT;
|
||||
|
||||
// This is actually a macro that modifies the last four operands,
|
||||
// hence why they are not passed by address.
|
||||
@@ -1186,6 +1281,7 @@ uint32_t bli_cpuid_query
|
||||
// 5th feature bit of the returned value
|
||||
__cpuid_count( 7, 1, eax, ebx, ecx, edx );
|
||||
|
||||
if ( bli_cpuid_has_features( eax, FEATURE_MASK_AVXVNNI ) ) *features |= FEATURE_AVXVNNI;
|
||||
if ( bli_cpuid_has_features( eax, FEATURE_MASK_AVX512BF16 ) ) *features |= FEATURE_AVX512BF16;
|
||||
|
||||
}
|
||||
@@ -1306,8 +1402,8 @@ uint32_t bli_cpuid_query
|
||||
// only if the xcr[7:5] bits are set. If they are not set, then
|
||||
// clear all feature bits related to AVX-512.
|
||||
if ( !bli_cpuid_has_features( eax, XGETBV_MASK_XMM |
|
||||
XGETBV_MASK_YMM |
|
||||
XGETBV_MASK_ZMM ) )
|
||||
XGETBV_MASK_YMM |
|
||||
XGETBV_MASK_ZMM ) )
|
||||
{
|
||||
*features &= ~( FEATURE_AVX512F |
|
||||
FEATURE_AVX512DQ |
|
||||
@@ -1322,7 +1418,7 @@ uint32_t bli_cpuid_query
|
||||
// only if the xcr[2] bit is set. If it is not set, then
|
||||
// clear all feature bits related to AVX.
|
||||
if ( !bli_cpuid_has_features( eax, XGETBV_MASK_XMM |
|
||||
XGETBV_MASK_YMM ) )
|
||||
XGETBV_MASK_YMM ) )
|
||||
{
|
||||
*features &= ~( FEATURE_AVX |
|
||||
FEATURE_AVX2 |
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018 - 2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2018 - 2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -68,6 +68,7 @@ bool bli_cpuid_is_sandybridge( uint32_t family, uint32_t model, uint32_t feature
|
||||
bool bli_cpuid_is_penryn( uint32_t family, uint32_t model, uint32_t features );
|
||||
|
||||
// AMD
|
||||
bool bli_cpuid_is_zen5( uint32_t family, uint32_t model, uint32_t features );
|
||||
bool bli_cpuid_is_zen4( uint32_t family, uint32_t model, uint32_t features );
|
||||
bool bli_cpuid_is_avx512_fallback( uint32_t family, uint32_t model, uint32_t features );
|
||||
bool bli_cpuid_is_zen3( uint32_t family, uint32_t model, uint32_t features );
|
||||
@@ -78,6 +79,7 @@ bool bli_cpuid_is_steamroller( uint32_t family, uint32_t model, uint32_t feature
|
||||
bool bli_cpuid_is_piledriver( uint32_t family, uint32_t model, uint32_t features );
|
||||
bool bli_cpuid_is_bulldozer( uint32_t family, uint32_t model, uint32_t features );
|
||||
|
||||
model_t bli_cpuid_get_zen5_cpuid_model( uint32_t family, uint32_t model, uint32_t features );
|
||||
model_t bli_cpuid_get_zen4_cpuid_model( uint32_t family, uint32_t model, uint32_t features );
|
||||
model_t bli_cpuid_get_zen3_cpuid_model( uint32_t family, uint32_t model, uint32_t features );
|
||||
|
||||
@@ -167,23 +169,27 @@ enum
|
||||
};
|
||||
enum
|
||||
{
|
||||
FEATURE_SSE3 = 0x0001,
|
||||
FEATURE_SSSE3 = 0x0002,
|
||||
FEATURE_SSE41 = 0x0004,
|
||||
FEATURE_SSE42 = 0x0008,
|
||||
FEATURE_AVX = 0x0010,
|
||||
FEATURE_AVX2 = 0x0020,
|
||||
FEATURE_FMA3 = 0x0040,
|
||||
FEATURE_FMA4 = 0x0080,
|
||||
FEATURE_AVX512F = 0x0100,
|
||||
FEATURE_AVX512DQ = 0x0200,
|
||||
FEATURE_AVX512PF = 0x0400,
|
||||
FEATURE_AVX512ER = 0x0800,
|
||||
FEATURE_AVX512CD = 0x1000,
|
||||
FEATURE_AVX512BW = 0x2000,
|
||||
FEATURE_AVX512VL = 0x4000,
|
||||
FEATURE_AVX512VNNI = 0x8000,
|
||||
FEATURE_AVX512BF16 = 0x10000
|
||||
FEATURE_SSE3 = 0x0001,
|
||||
FEATURE_SSSE3 = 0x0002,
|
||||
FEATURE_SSE41 = 0x0004,
|
||||
FEATURE_SSE42 = 0x0008,
|
||||
FEATURE_AVX = 0x0010,
|
||||
FEATURE_AVX2 = 0x0020,
|
||||
FEATURE_FMA3 = 0x0040,
|
||||
FEATURE_FMA4 = 0x0080,
|
||||
FEATURE_AVX512F = 0x0100,
|
||||
FEATURE_AVX512DQ = 0x0200,
|
||||
FEATURE_AVX512PF = 0x0400,
|
||||
FEATURE_AVX512ER = 0x0800,
|
||||
FEATURE_AVX512CD = 0x1000,
|
||||
FEATURE_AVX512BW = 0x2000,
|
||||
FEATURE_AVX512VL = 0x4000,
|
||||
FEATURE_AVX512VNNI = 0x8000,
|
||||
FEATURE_AVX512BF16 = 0x10000,
|
||||
FEATURE_AVXVNNI = 0x20000,
|
||||
FEATURE_AVX512VP2INTERSECT = 0x40000,
|
||||
FEATURE_MOVDIRI = 0x80000,
|
||||
FEATURE_MOVDIR64B = 0x100000
|
||||
};
|
||||
|
||||
#elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM)
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018 - 2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2018 - 2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -150,6 +150,10 @@ gint_t bli_env_get_var_arch_type( const char* env, gint_t fallback )
|
||||
r_val = BLIS_ARCH_PENRYN;
|
||||
}
|
||||
// AMD
|
||||
else if (strcmp(str, "zen5") == 0)
|
||||
{
|
||||
r_val = BLIS_ARCH_ZEN4;
|
||||
}
|
||||
else if (strcmp(str, "zen4") == 0)
|
||||
{
|
||||
r_val = BLIS_ARCH_ZEN4;
|
||||
@@ -313,7 +317,17 @@ gint_t bli_env_get_var_model_type( const char* env, gint_t fallback )
|
||||
str[i] = tolower(str[i]);
|
||||
}
|
||||
// AMD
|
||||
if (strcmp(str, "genoa") == 0)
|
||||
if (strcmp(str, "turin") == 0)
|
||||
{
|
||||
r_val = BLIS_MODEL_TURIN;
|
||||
}
|
||||
else if ((strcmp(str, "turin_dense") == 0) ||
|
||||
(strcmp(str, "turin-dense") == 0) ||
|
||||
(strcmp(str, "turindense") == 0))
|
||||
{
|
||||
r_val = BLIS_MODEL_TURIN_DENSE;
|
||||
}
|
||||
else if (strcmp(str, "genoa") == 0)
|
||||
{
|
||||
r_val = BLIS_MODEL_GENOA;
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018 - 2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2018 - 2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -107,6 +107,11 @@ void bli_gks_init( void )
|
||||
#endif
|
||||
|
||||
// AMD architectures
|
||||
#ifdef BLIS_CONFIG_ZEN5
|
||||
bli_gks_register_cntx( BLIS_ARCH_ZEN4, bli_cntx_init_zen4,
|
||||
bli_cntx_init_zen4_ref,
|
||||
bli_cntx_init_zen4_ind );
|
||||
#endif
|
||||
#ifdef BLIS_CONFIG_ZEN4
|
||||
bli_gks_register_cntx( BLIS_ARCH_ZEN4, bli_cntx_init_zen4,
|
||||
bli_cntx_init_zen4_ref,
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2016, Hewlett Packard Enterprise Development LP
|
||||
Copyright (C) 2019 - 2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2019 - 2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -62,6 +62,9 @@ CNTX_INIT_PROTS( penryn )
|
||||
#endif
|
||||
|
||||
// -- AMD64 architectures --
|
||||
#ifdef BLIS_CONFIG_ZEN5
|
||||
CNTX_INIT_PROTS( zen5 )
|
||||
#endif
|
||||
#ifdef BLIS_CONFIG_ZEN4
|
||||
CNTX_INIT_PROTS( zen4 )
|
||||
#endif
|
||||
@@ -177,6 +180,9 @@ CNTX_INIT_PROTS( generic )
|
||||
|
||||
// -- AMD64 architectures --
|
||||
|
||||
#ifdef BLIS_FAMILY_ZEN5
|
||||
#include "bli_family_zen5.h"
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_ZEN4
|
||||
#include "bli_family_zen4.h"
|
||||
#endif
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2016, Hewlett Packard Enterprise Development LP
|
||||
Copyright (C) 2021 - 2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2021 - 2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -1023,6 +1023,7 @@ typedef enum
|
||||
BLIS_ARCH_PENRYN,
|
||||
|
||||
// AMD
|
||||
BLIS_ARCH_ZEN5,
|
||||
BLIS_ARCH_ZEN4,
|
||||
BLIS_ARCH_ZEN3,
|
||||
BLIS_ARCH_ZEN2,
|
||||
@@ -1063,6 +1064,10 @@ typedef enum
|
||||
// Default model
|
||||
BLIS_MODEL_DEFAULT,
|
||||
|
||||
// AMD Zen5
|
||||
BLIS_MODEL_TURIN,
|
||||
BLIS_MODEL_TURIN_DENSE,
|
||||
|
||||
// AMD Zen4
|
||||
BLIS_MODEL_GENOA,
|
||||
BLIS_MODEL_BERGAMO,
|
||||
|
||||
Reference in New Issue
Block a user