BLIS: zen5 cpuid and arch changes

Implement initial support for Zen5 systems:
- Detect new Zen5 AVXVNNI, AVX512VP2INTERSECT, MOVDIRI and MOVDIR64B
  instructions.
- Assume for now that Zen5 will use Zen4 code path. BLIS_ARCH_TYPE=zen5
  will therefore function as an alias for BLIS_ARCH_TYPE=zen4, but
  different hardware model will still be detected.

AMD-Internal: [CPUPL-3518]
Change-Id: I00fb413d743f152a5412ace3e740df1fd39a1600
This commit is contained in:
Edward Smyth
2024-01-17 09:22:43 -05:00
parent 864170f5cb
commit f93ccb0cea
9 changed files with 243 additions and 62 deletions

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2022 - 2023, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2022 - 2024, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -49,8 +49,7 @@
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 6144, 4002, 4080, 2004 ); \
\
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 5, 5, -1, -1 ); \
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 ); \
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 );
#define BLI_CNTX_DEFAULT_BLKSZ_LIST_BERGAMO(blkszs) \
/* s d c z */ \
@@ -62,8 +61,33 @@
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 6144, 3600, 4080, 2004 ); \
\
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 5, 5, -1, -1 ); \
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 ); \
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 );
/* Starting point for Turin, copied from Genoa */
#define BLI_CNTX_DEFAULT_BLKSZ_LIST_TURIN(blkszs) \
/* s d c z */ \
bli_blksz_init_easy( &blkszs[ BLIS_MR ], 32, 32, 3, 12 ); \
bli_blksz_init_easy( &blkszs[ BLIS_NR ], 12, 6, 8, 4 ); \
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 512, 128, 144, 60 ); \
bli_blksz_init ( &blkszs[ BLIS_KC ], 480, 512, 256, 512, \
480, 320, 256, 160 ); \
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 6144, 4002, 4080, 2004 ); \
\
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 5, 5, -1, -1 ); \
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 );
/* Starting point for Turin Dense, copied from Bergamo */
#define BLI_CNTX_DEFAULT_BLKSZ_LIST_TURIN_DENSE(blkszs) \
/* s d c z */ \
bli_blksz_init_easy( &blkszs[ BLIS_MR ], 32, 32, 3, 12 ); \
bli_blksz_init_easy( &blkszs[ BLIS_NR ], 12, 6, 8, 4 ); \
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 512, 64, 144, 60 ); \
bli_blksz_init ( &blkszs[ BLIS_KC ], 480, 512, 256, 512, \
480, 320, 256, 160 ); \
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 6144, 3600, 4080, 2004 ); \
\
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 5, 5, -1, -1 ); \
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 );
void bli_cntx_init_zen4( cntx_t* cntx )
{
@@ -212,7 +236,15 @@ void bli_cntx_init_zen4( cntx_t* cntx )
// These are reference block sizes and may be overridden based on
// number of threads used at runtime.
if ( bli_init_model_query_id() == BLIS_MODEL_BERGAMO )
if ( bli_init_model_query_id() == BLIS_MODEL_TURIN_DENSE )
{
BLI_CNTX_DEFAULT_BLKSZ_LIST_TURIN_DENSE(blkszs);
}
else if ( bli_init_model_query_id() == BLIS_MODEL_TURIN )
{
BLI_CNTX_DEFAULT_BLKSZ_LIST_TURIN(blkszs);
}
else if ( bli_init_model_query_id() == BLIS_MODEL_BERGAMO )
{
BLI_CNTX_DEFAULT_BLKSZ_LIST_BERGAMO(blkszs);
}

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2023, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2018 - 2024, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -569,6 +569,7 @@ static char* config_name[ BLIS_NUM_ARCHS ] =
"sandybridge",
"penryn",
"zen5",
"zen4",
"zen3",
"zen2",
@@ -609,6 +610,9 @@ static char* model_name[ BLIS_NUM_MODELS ] =
"default",
"Turin",
"Turin Dense",
"Genoa",
"Bergamo",
"Genoa-X",

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2023, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2018 - 2024, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -898,9 +898,22 @@ err_t bli_check_valid_model_id( arch_t arch_id, model_t model_id )
// Model ranges are specified in bli_type_defs.h
err_t e_val = BLIS_INVALID_MODEL_ID;
if ( arch_id == BLIS_ARCH_ZEN5 )
{
if ( ( gint_t )model_id >= BLIS_MODEL_TURIN &&
( gint_t )model_id <= BLIS_MODEL_TURIN_DENSE )
{
e_val = BLIS_SUCCESS;
}
if ( ( gint_t )model_id >= BLIS_MODEL_GENOA &&
( gint_t )model_id <= BLIS_MODEL_GENOA_X )
{
e_val = BLIS_SUCCESS;
}
}
if ( arch_id == BLIS_ARCH_ZEN4 )
{
if ( ( gint_t )model_id >= BLIS_MODEL_GENOA &&
if ( ( gint_t )model_id >= BLIS_MODEL_TURIN &&
( gint_t )model_id <= BLIS_MODEL_GENOA_X )
{
e_val = BLIS_SUCCESS;

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2023, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2018 - 2024, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2019, Dave Love, University of Manchester
Redistribution and use in source and binary forms, with or without
@@ -185,10 +185,11 @@ arch_t bli_cpuid_query_id( void )
}
else if ( vendor == VENDOR_AMD )
{
// Check for each AMD configuration that is enabled, check for that
// microarchitecture. We check from most recent to most dated.
#ifdef BLIS_CONFIG_ZEN4
if ( bli_cpuid_is_zen5( family, model, features ) )
return BLIS_ARCH_ZEN4;
if ( bli_cpuid_is_zen4( family, model, features ) )
return BLIS_ARCH_ZEN4;
// Fallback test for future AMD processors
@@ -240,6 +241,22 @@ model_t bli_cpuid_query_model_id( arch_t arch_id )
// Set default for architectures where separate models haven't been defined.
model_t cpuid_model = BLIS_MODEL_DEFAULT;
#ifdef BLIS_CONFIG_ZEN5
if (arch_id == BLIS_ARCH_ZEN5)
{
// Call the CPUID instruction and parse its results into a family id,
// model id, and a feature bit field. The return value encodes the
// vendor.
uint32_t __attribute__ ((unused)) vendor;
uint32_t family, model, features;
vendor = bli_cpuid_query( &family, &model, &features );
// Check CPU model.
cpuid_model = bli_cpuid_get_zen5_cpuid_model( family, model, features );
}
#endif
#ifdef BLIS_CONFIG_ZEN4
if (arch_id == BLIS_ARCH_ZEN4)
{
@@ -252,8 +269,16 @@ model_t bli_cpuid_query_model_id( arch_t arch_id )
vendor = bli_cpuid_query( &family, &model, &features );
// Check CPU model.
cpuid_model = bli_cpuid_get_zen4_cpuid_model( family, model, features );
// For now, zen4 code path is also used for zen5 so check
// for zen5 models here too.
if ( family == 0x19 )
{
cpuid_model = bli_cpuid_get_zen4_cpuid_model( family, model, features );
}
else if ( family == 0x1A )
{
cpuid_model = bli_cpuid_get_zen5_cpuid_model( family, model, features );
}
}
#endif
#ifdef BLIS_CONFIG_ZEN3
@@ -386,6 +411,60 @@ bool bli_cpuid_is_penryn
}
// -----------------------------------------------------------------------------
bool bli_cpuid_is_zen5
(
uint32_t family,
uint32_t model,
uint32_t features
)
{
// Check for expected CPU features.
const uint32_t expected = FEATURE_SSE3 |
FEATURE_SSSE3 |
FEATURE_SSE41 |
FEATURE_SSE42 |
FEATURE_AVX |
FEATURE_FMA3 |
FEATURE_AVX2 |
FEATURE_AVX512F |
FEATURE_AVX512DQ |
FEATURE_AVX512CD |
FEATURE_AVX512BW |
FEATURE_AVX512VL |
FEATURE_AVX512VNNI |
FEATURE_AVX512BF16 |
FEATURE_MOVDIRI |
FEATURE_MOVDIR64B |
FEATURE_AVX512VP2INTERSECT |
FEATURE_AVXVNNI;
if ( !bli_cpuid_has_features( features, expected ) ) return FALSE;
// For zen5 the family id is 0x1A
if ( family != 0x1A ) return FALSE;
return TRUE;
}
model_t bli_cpuid_get_zen5_cpuid_model
(
uint32_t family,
uint32_t model,
uint32_t features
)
{
// Look at model of CPU and set cpuid_model appropriately.
// For Zen5, the default is Turin.
model_t cpuid_model = BLIS_MODEL_TURIN;
if ( family == 0x1A )
{
if ( 0x10 <= model && model <= 0x1f ) // Turin Dense
{
cpuid_model = BLIS_MODEL_TURIN_DENSE;
}
}
return cpuid_model;
}
bool bli_cpuid_is_zen4
(
uint32_t family,
@@ -438,6 +517,14 @@ model_t bli_cpuid_get_zen4_cpuid_model
{
cpuid_model = BLIS_MODEL_BERGAMO;
}
else
{
uint32_t l3_cache_size = bli_cpuid_query_l3_cache_size();
if ( l3_cache_size > 393216 )
{
cpuid_model = BLIS_MODEL_GENOA_X;
}
}
}
return cpuid_model;
}
@@ -1060,7 +1147,7 @@ model_t bli_cpuid_query_model_id( arch_t arch_id )
Copyright (C) 2017, The University of Texas at Austin
Copyright (C) 2017, Devin Matthews
Copyright (C) 2018 - 2023, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2018 - 2024, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -1092,29 +1179,33 @@ model_t bli_cpuid_query_model_id( arch_t arch_id )
enum
{
// input register(s) output register
FEATURE_MASK_SSE3 = (1u<< 0), // cpuid[eax=1] :ecx[0]
FEATURE_MASK_SSSE3 = (1u<< 9), // cpuid[eax=1] :ecx[9]
FEATURE_MASK_SSE41 = (1u<<19), // cpuid[eax=1] :ecx[19]
FEATURE_MASK_SSE42 = (1u<<20), // cpuid[eax=1] :ecx[20]
FEATURE_MASK_AVX = (1u<<28), // cpuid[eax=1] :ecx[28]
FEATURE_MASK_AVX2 = (1u<< 5), // cpuid[eax=7,ecx=0] :ebx[5]
FEATURE_MASK_FMA3 = (1u<<12), // cpuid[eax=1] :ecx[12]
FEATURE_MASK_FMA4 = (1u<<16), // cpuid[eax=0x80000001]:ecx[16]
FEATURE_MASK_AVX512F = (1u<<16), // cpuid[eax=7,ecx=0] :ebx[16]
FEATURE_MASK_AVX512DQ = (1u<<17), // cpuid[eax=7,ecx=0] :ebx[17]
FEATURE_MASK_AVX512PF = (1u<<26), // cpuid[eax=7,ecx=0] :ebx[26]
FEATURE_MASK_AVX512ER = (1u<<27), // cpuid[eax=7,ecx=0] :ebx[27]
FEATURE_MASK_AVX512CD = (1u<<28), // cpuid[eax=7,ecx=0] :ebx[28]
FEATURE_MASK_AVX512BW = (1u<<30), // cpuid[eax=7,ecx=0] :ebx[30]
FEATURE_MASK_AVX512VL = (1u<<31), // cpuid[eax=7,ecx=0] :ebx[31]
FEATURE_MASK_AVX512VNNI = (1u<<11), // cpuid[eax=7,ecx=0] :ecx[11]
FEATURE_MASK_AVX512BF16 = (1u<< 5), // cpuid[eax=7,ecx=1] :eax[5]
FEATURE_MASK_XGETBV = (1u<<26)|
(1u<<27), // cpuid[eax=1] :ecx[27:26]
XGETBV_MASK_XMM = 0x02u, // xcr0[1]
XGETBV_MASK_YMM = 0x04u, // xcr0[2]
XGETBV_MASK_ZMM = 0xe0u // xcr0[7:5]
// input register(s) output register
FEATURE_MASK_SSE3 = (1u<< 0), // cpuid[eax=1] :ecx[0]
FEATURE_MASK_SSSE3 = (1u<< 9), // cpuid[eax=1] :ecx[9]
FEATURE_MASK_SSE41 = (1u<<19), // cpuid[eax=1] :ecx[19]
FEATURE_MASK_SSE42 = (1u<<20), // cpuid[eax=1] :ecx[20]
FEATURE_MASK_AVX = (1u<<28), // cpuid[eax=1] :ecx[28]
FEATURE_MASK_AVX2 = (1u<< 5), // cpuid[eax=7,ecx=0] :ebx[5]
FEATURE_MASK_FMA3 = (1u<<12), // cpuid[eax=1] :ecx[12]
FEATURE_MASK_FMA4 = (1u<<16), // cpuid[eax=0x80000001] :ecx[16]
FEATURE_MASK_AVX512F = (1u<<16), // cpuid[eax=7,ecx=0] :ebx[16]
FEATURE_MASK_AVX512DQ = (1u<<17), // cpuid[eax=7,ecx=0] :ebx[17]
FEATURE_MASK_AVX512PF = (1u<<26), // cpuid[eax=7,ecx=0] :ebx[26]
FEATURE_MASK_AVX512ER = (1u<<27), // cpuid[eax=7,ecx=0] :ebx[27]
FEATURE_MASK_AVX512CD = (1u<<28), // cpuid[eax=7,ecx=0] :ebx[28]
FEATURE_MASK_AVX512BW = (1u<<30), // cpuid[eax=7,ecx=0] :ebx[30]
FEATURE_MASK_AVX512VL = (1u<<31), // cpuid[eax=7,ecx=0] :ebx[31]
FEATURE_MASK_AVX512VNNI = (1u<<11), // cpuid[eax=7,ecx=0] :ecx[11]
FEATURE_MASK_MOVDIRI = (1u<<27), // cpuid[eax=7,ecx=0] :ecx[27]
FEATURE_MASK_MOVDIR64B = (1u<<28), // cpuid[eax=7,ecx=0] :ecx[28]
FEATURE_MASK_AVX512VP2INTERSECT = (1u<<8), // cpuid[eax=7,ecx=0] :edx[8]
FEATURE_MASK_AVXVNNI = (1u<< 4), // cpuid[eax=7,ecx=1] :eax[4]
FEATURE_MASK_AVX512BF16 = (1u<< 5), // cpuid[eax=7,ecx=1] :eax[5]
FEATURE_MASK_XGETBV = (1u<<26)|
(1u<<27), // cpuid[eax=1] :ecx[27:26]
XGETBV_MASK_XMM = 0x02u, // xcr0[1]
XGETBV_MASK_YMM = 0x04u, // xcr0[2]
XGETBV_MASK_ZMM = 0xe0u // xcr0[7:5]
};
@@ -1178,6 +1269,10 @@ uint32_t bli_cpuid_query
if ( bli_cpuid_has_features( ebx, FEATURE_MASK_AVX512VL ) ) *features |= FEATURE_AVX512VL;
if ( bli_cpuid_has_features( ecx, FEATURE_MASK_AVX512VNNI ) ) *features |= FEATURE_AVX512VNNI;
if ( bli_cpuid_has_features( ecx, FEATURE_MASK_MOVDIRI ) ) *features |= FEATURE_MOVDIRI;
if ( bli_cpuid_has_features( ecx, FEATURE_MASK_MOVDIR64B ) ) *features |= FEATURE_MOVDIR64B;
if ( bli_cpuid_has_features( edx, FEATURE_MASK_AVX512VP2INTERSECT ) ) *features |= FEATURE_AVX512VP2INTERSECT;
// This is actually a macro that modifies the last four operands,
// hence why they are not passed by address.
@@ -1186,6 +1281,7 @@ uint32_t bli_cpuid_query
// 5th feature bit of the returned value
__cpuid_count( 7, 1, eax, ebx, ecx, edx );
if ( bli_cpuid_has_features( eax, FEATURE_MASK_AVXVNNI ) ) *features |= FEATURE_AVXVNNI;
if ( bli_cpuid_has_features( eax, FEATURE_MASK_AVX512BF16 ) ) *features |= FEATURE_AVX512BF16;
}
@@ -1306,8 +1402,8 @@ uint32_t bli_cpuid_query
// only if the xcr[7:5] bits are set. If they are not set, then
// clear all feature bits related to AVX-512.
if ( !bli_cpuid_has_features( eax, XGETBV_MASK_XMM |
XGETBV_MASK_YMM |
XGETBV_MASK_ZMM ) )
XGETBV_MASK_YMM |
XGETBV_MASK_ZMM ) )
{
*features &= ~( FEATURE_AVX512F |
FEATURE_AVX512DQ |
@@ -1322,7 +1418,7 @@ uint32_t bli_cpuid_query
// only if the xcr[2] bit is set. If it is not set, then
// clear all feature bits related to AVX.
if ( !bli_cpuid_has_features( eax, XGETBV_MASK_XMM |
XGETBV_MASK_YMM ) )
XGETBV_MASK_YMM ) )
{
*features &= ~( FEATURE_AVX |
FEATURE_AVX2 |

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2023, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2018 - 2024, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -68,6 +68,7 @@ bool bli_cpuid_is_sandybridge( uint32_t family, uint32_t model, uint32_t feature
bool bli_cpuid_is_penryn( uint32_t family, uint32_t model, uint32_t features );
// AMD
bool bli_cpuid_is_zen5( uint32_t family, uint32_t model, uint32_t features );
bool bli_cpuid_is_zen4( uint32_t family, uint32_t model, uint32_t features );
bool bli_cpuid_is_avx512_fallback( uint32_t family, uint32_t model, uint32_t features );
bool bli_cpuid_is_zen3( uint32_t family, uint32_t model, uint32_t features );
@@ -78,6 +79,7 @@ bool bli_cpuid_is_steamroller( uint32_t family, uint32_t model, uint32_t feature
bool bli_cpuid_is_piledriver( uint32_t family, uint32_t model, uint32_t features );
bool bli_cpuid_is_bulldozer( uint32_t family, uint32_t model, uint32_t features );
model_t bli_cpuid_get_zen5_cpuid_model( uint32_t family, uint32_t model, uint32_t features );
model_t bli_cpuid_get_zen4_cpuid_model( uint32_t family, uint32_t model, uint32_t features );
model_t bli_cpuid_get_zen3_cpuid_model( uint32_t family, uint32_t model, uint32_t features );
@@ -167,23 +169,27 @@ enum
};
enum
{
FEATURE_SSE3 = 0x0001,
FEATURE_SSSE3 = 0x0002,
FEATURE_SSE41 = 0x0004,
FEATURE_SSE42 = 0x0008,
FEATURE_AVX = 0x0010,
FEATURE_AVX2 = 0x0020,
FEATURE_FMA3 = 0x0040,
FEATURE_FMA4 = 0x0080,
FEATURE_AVX512F = 0x0100,
FEATURE_AVX512DQ = 0x0200,
FEATURE_AVX512PF = 0x0400,
FEATURE_AVX512ER = 0x0800,
FEATURE_AVX512CD = 0x1000,
FEATURE_AVX512BW = 0x2000,
FEATURE_AVX512VL = 0x4000,
FEATURE_AVX512VNNI = 0x8000,
FEATURE_AVX512BF16 = 0x10000
FEATURE_SSE3 = 0x0001,
FEATURE_SSSE3 = 0x0002,
FEATURE_SSE41 = 0x0004,
FEATURE_SSE42 = 0x0008,
FEATURE_AVX = 0x0010,
FEATURE_AVX2 = 0x0020,
FEATURE_FMA3 = 0x0040,
FEATURE_FMA4 = 0x0080,
FEATURE_AVX512F = 0x0100,
FEATURE_AVX512DQ = 0x0200,
FEATURE_AVX512PF = 0x0400,
FEATURE_AVX512ER = 0x0800,
FEATURE_AVX512CD = 0x1000,
FEATURE_AVX512BW = 0x2000,
FEATURE_AVX512VL = 0x4000,
FEATURE_AVX512VNNI = 0x8000,
FEATURE_AVX512BF16 = 0x10000,
FEATURE_AVXVNNI = 0x20000,
FEATURE_AVX512VP2INTERSECT = 0x40000,
FEATURE_MOVDIRI = 0x80000,
FEATURE_MOVDIR64B = 0x100000
};
#elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM)

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2023, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2018 - 2024, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -150,6 +150,10 @@ gint_t bli_env_get_var_arch_type( const char* env, gint_t fallback )
r_val = BLIS_ARCH_PENRYN;
}
// AMD
else if (strcmp(str, "zen5") == 0)
{
r_val = BLIS_ARCH_ZEN4;
}
else if (strcmp(str, "zen4") == 0)
{
r_val = BLIS_ARCH_ZEN4;
@@ -313,7 +317,17 @@ gint_t bli_env_get_var_model_type( const char* env, gint_t fallback )
str[i] = tolower(str[i]);
}
// AMD
if (strcmp(str, "genoa") == 0)
if (strcmp(str, "turin") == 0)
{
r_val = BLIS_MODEL_TURIN;
}
else if ((strcmp(str, "turin_dense") == 0) ||
(strcmp(str, "turin-dense") == 0) ||
(strcmp(str, "turindense") == 0))
{
r_val = BLIS_MODEL_TURIN_DENSE;
}
else if (strcmp(str, "genoa") == 0)
{
r_val = BLIS_MODEL_GENOA;
}

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2023, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2018 - 2024, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -107,6 +107,11 @@ void bli_gks_init( void )
#endif
// AMD architectures
#ifdef BLIS_CONFIG_ZEN5
bli_gks_register_cntx( BLIS_ARCH_ZEN4, bli_cntx_init_zen4,
bli_cntx_init_zen4_ref,
bli_cntx_init_zen4_ind );
#endif
#ifdef BLIS_CONFIG_ZEN4
bli_gks_register_cntx( BLIS_ARCH_ZEN4, bli_cntx_init_zen4,
bli_cntx_init_zen4_ref,

View File

@@ -6,7 +6,7 @@
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2016, Hewlett Packard Enterprise Development LP
Copyright (C) 2019 - 2022, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2019 - 2024, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -62,6 +62,9 @@ CNTX_INIT_PROTS( penryn )
#endif
// -- AMD64 architectures --
#ifdef BLIS_CONFIG_ZEN5
CNTX_INIT_PROTS( zen5 )
#endif
#ifdef BLIS_CONFIG_ZEN4
CNTX_INIT_PROTS( zen4 )
#endif
@@ -177,6 +180,9 @@ CNTX_INIT_PROTS( generic )
// -- AMD64 architectures --
#ifdef BLIS_FAMILY_ZEN5
#include "bli_family_zen5.h"
#endif
#ifdef BLIS_FAMILY_ZEN4
#include "bli_family_zen4.h"
#endif

View File

@@ -6,7 +6,7 @@
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2016, Hewlett Packard Enterprise Development LP
Copyright (C) 2021 - 2023, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2021 - 2024, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -1023,6 +1023,7 @@ typedef enum
BLIS_ARCH_PENRYN,
// AMD
BLIS_ARCH_ZEN5,
BLIS_ARCH_ZEN4,
BLIS_ARCH_ZEN3,
BLIS_ARCH_ZEN2,
@@ -1063,6 +1064,10 @@ typedef enum
// Default model
BLIS_MODEL_DEFAULT,
// AMD Zen5
BLIS_MODEL_TURIN,
BLIS_MODEL_TURIN_DENSE,
// AMD Zen4
BLIS_MODEL_GENOA,
BLIS_MODEL_BERGAMO,