diff --git a/config/zen4/bli_cntx_init_zen4.c b/config/zen4/bli_cntx_init_zen4.c index 8a79ff8a1..7a3ed237c 100644 --- a/config/zen4/bli_cntx_init_zen4.c +++ b/config/zen4/bli_cntx_init_zen4.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2022 - 2023, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2022 - 2024, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -49,8 +49,7 @@ bli_blksz_init_easy( &blkszs[ BLIS_NC ], 6144, 4002, 4080, 2004 ); \ \ bli_blksz_init_easy( &blkszs[ BLIS_AF ], 5, 5, -1, -1 ); \ - bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 ); \ - + bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 ); #define BLI_CNTX_DEFAULT_BLKSZ_LIST_BERGAMO(blkszs) \ /* s d c z */ \ @@ -62,8 +61,33 @@ bli_blksz_init_easy( &blkszs[ BLIS_NC ], 6144, 3600, 4080, 2004 ); \ \ bli_blksz_init_easy( &blkszs[ BLIS_AF ], 5, 5, -1, -1 ); \ - bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 ); \ + bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 ); +/* Starting point for Turin, copied from Genoa */ +#define BLI_CNTX_DEFAULT_BLKSZ_LIST_TURIN(blkszs) \ + /* s d c z */ \ + bli_blksz_init_easy( &blkszs[ BLIS_MR ], 32, 32, 3, 12 ); \ + bli_blksz_init_easy( &blkszs[ BLIS_NR ], 12, 6, 8, 4 ); \ + bli_blksz_init_easy( &blkszs[ BLIS_MC ], 512, 128, 144, 60 ); \ + bli_blksz_init ( &blkszs[ BLIS_KC ], 480, 512, 256, 512, \ + 480, 320, 256, 160 ); \ + bli_blksz_init_easy( &blkszs[ BLIS_NC ], 6144, 4002, 4080, 2004 ); \ + \ + bli_blksz_init_easy( &blkszs[ BLIS_AF ], 5, 5, -1, -1 ); \ + bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 ); + +/* Starting point for Turin Dense, copied from Bergamo */ +#define BLI_CNTX_DEFAULT_BLKSZ_LIST_TURIN_DENSE(blkszs) \ + /* s d c z */ \ + bli_blksz_init_easy( &blkszs[ BLIS_MR ], 32, 32, 3, 12 ); \ + bli_blksz_init_easy( &blkszs[ BLIS_NR ], 12, 6, 8, 4 ); \ + bli_blksz_init_easy( &blkszs[ BLIS_MC ], 512, 64, 144, 60 ); \ + bli_blksz_init ( &blkszs[ BLIS_KC ], 480, 512, 256, 512, \ + 480, 320, 256, 160 ); \ + bli_blksz_init_easy( &blkszs[ BLIS_NC ], 6144, 3600, 4080, 2004 ); \ + \ + bli_blksz_init_easy( &blkszs[ BLIS_AF ], 5, 5, -1, -1 ); \ + bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 ); void bli_cntx_init_zen4( cntx_t* cntx ) { @@ -212,7 +236,15 @@ void bli_cntx_init_zen4( cntx_t* cntx ) // These are reference block sizes and may be overridden based on // number of threads used at runtime. - if ( bli_init_model_query_id() == BLIS_MODEL_BERGAMO ) + if ( bli_init_model_query_id() == BLIS_MODEL_TURIN_DENSE ) + { + BLI_CNTX_DEFAULT_BLKSZ_LIST_TURIN_DENSE(blkszs); + } + else if ( bli_init_model_query_id() == BLIS_MODEL_TURIN ) + { + BLI_CNTX_DEFAULT_BLKSZ_LIST_TURIN(blkszs); + } + else if ( bli_init_model_query_id() == BLIS_MODEL_BERGAMO ) { BLI_CNTX_DEFAULT_BLKSZ_LIST_BERGAMO(blkszs); } diff --git a/frame/base/bli_arch.c b/frame/base/bli_arch.c index e4d4edfba..bcbd4c9f5 100644 --- a/frame/base/bli_arch.c +++ b/frame/base/bli_arch.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2018 - 2023, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2018 - 2024, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -569,6 +569,7 @@ static char* config_name[ BLIS_NUM_ARCHS ] = "sandybridge", "penryn", + "zen5", "zen4", "zen3", "zen2", @@ -609,6 +610,9 @@ static char* model_name[ BLIS_NUM_MODELS ] = "default", + "Turin", + "Turin Dense", + "Genoa", "Bergamo", "Genoa-X", diff --git a/frame/base/bli_check.c b/frame/base/bli_check.c index a7c3d194b..fbe4bce91 100644 --- a/frame/base/bli_check.c +++ b/frame/base/bli_check.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2018 - 2023, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2018 - 2024, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -898,9 +898,22 @@ err_t bli_check_valid_model_id( arch_t arch_id, model_t model_id ) // Model ranges are specified in bli_type_defs.h err_t e_val = BLIS_INVALID_MODEL_ID; + if ( arch_id == BLIS_ARCH_ZEN5 ) + { + if ( ( gint_t )model_id >= BLIS_MODEL_TURIN && + ( gint_t )model_id <= BLIS_MODEL_TURIN_DENSE ) + { + e_val = BLIS_SUCCESS; + } + if ( ( gint_t )model_id >= BLIS_MODEL_GENOA && + ( gint_t )model_id <= BLIS_MODEL_GENOA_X ) + { + e_val = BLIS_SUCCESS; + } + } if ( arch_id == BLIS_ARCH_ZEN4 ) { - if ( ( gint_t )model_id >= BLIS_MODEL_GENOA && + if ( ( gint_t )model_id >= BLIS_MODEL_TURIN && ( gint_t )model_id <= BLIS_MODEL_GENOA_X ) { e_val = BLIS_SUCCESS; diff --git a/frame/base/bli_cpuid.c b/frame/base/bli_cpuid.c index d54c6a8bb..ac4c2508f 100644 --- a/frame/base/bli_cpuid.c +++ b/frame/base/bli_cpuid.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2018 - 2023, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2018 - 2024, Advanced Micro Devices, Inc. All rights reserved. Copyright (C) 2019, Dave Love, University of Manchester Redistribution and use in source and binary forms, with or without @@ -185,10 +185,11 @@ arch_t bli_cpuid_query_id( void ) } else if ( vendor == VENDOR_AMD ) { - // Check for each AMD configuration that is enabled, check for that // microarchitecture. We check from most recent to most dated. #ifdef BLIS_CONFIG_ZEN4 + if ( bli_cpuid_is_zen5( family, model, features ) ) + return BLIS_ARCH_ZEN4; if ( bli_cpuid_is_zen4( family, model, features ) ) return BLIS_ARCH_ZEN4; // Fallback test for future AMD processors @@ -240,6 +241,22 @@ model_t bli_cpuid_query_model_id( arch_t arch_id ) // Set default for architectures where separate models haven't been defined. model_t cpuid_model = BLIS_MODEL_DEFAULT; +#ifdef BLIS_CONFIG_ZEN5 + if (arch_id == BLIS_ARCH_ZEN5) + { + // Call the CPUID instruction and parse its results into a family id, + // model id, and a feature bit field. The return value encodes the + // vendor. + + uint32_t __attribute__ ((unused)) vendor; + uint32_t family, model, features; + + vendor = bli_cpuid_query( &family, &model, &features ); + + // Check CPU model. + cpuid_model = bli_cpuid_get_zen5_cpuid_model( family, model, features ); + } +#endif #ifdef BLIS_CONFIG_ZEN4 if (arch_id == BLIS_ARCH_ZEN4) { @@ -252,8 +269,16 @@ model_t bli_cpuid_query_model_id( arch_t arch_id ) vendor = bli_cpuid_query( &family, &model, &features ); - // Check CPU model. - cpuid_model = bli_cpuid_get_zen4_cpuid_model( family, model, features ); + // For now, zen4 code path is also used for zen5 so check + // for zen5 models here too. + if ( family == 0x19 ) + { + cpuid_model = bli_cpuid_get_zen4_cpuid_model( family, model, features ); + } + else if ( family == 0x1A ) + { + cpuid_model = bli_cpuid_get_zen5_cpuid_model( family, model, features ); + } } #endif #ifdef BLIS_CONFIG_ZEN3 @@ -386,6 +411,60 @@ bool bli_cpuid_is_penryn } // ----------------------------------------------------------------------------- +bool bli_cpuid_is_zen5 + ( + uint32_t family, + uint32_t model, + uint32_t features + ) +{ + // Check for expected CPU features. + const uint32_t expected = FEATURE_SSE3 | + FEATURE_SSSE3 | + FEATURE_SSE41 | + FEATURE_SSE42 | + FEATURE_AVX | + FEATURE_FMA3 | + FEATURE_AVX2 | + FEATURE_AVX512F | + FEATURE_AVX512DQ | + FEATURE_AVX512CD | + FEATURE_AVX512BW | + FEATURE_AVX512VL | + FEATURE_AVX512VNNI | + FEATURE_AVX512BF16 | + FEATURE_MOVDIRI | + FEATURE_MOVDIR64B | + FEATURE_AVX512VP2INTERSECT | + FEATURE_AVXVNNI; + + if ( !bli_cpuid_has_features( features, expected ) ) return FALSE; + + // For zen5 the family id is 0x1A + if ( family != 0x1A ) return FALSE; + + return TRUE; +} +model_t bli_cpuid_get_zen5_cpuid_model + ( + uint32_t family, + uint32_t model, + uint32_t features + ) +{ + // Look at model of CPU and set cpuid_model appropriately. + // For Zen5, the default is Turin. + model_t cpuid_model = BLIS_MODEL_TURIN; + if ( family == 0x1A ) + { + if ( 0x10 <= model && model <= 0x1f ) // Turin Dense + { + cpuid_model = BLIS_MODEL_TURIN_DENSE; + } + } + return cpuid_model; +} + bool bli_cpuid_is_zen4 ( uint32_t family, @@ -438,6 +517,14 @@ model_t bli_cpuid_get_zen4_cpuid_model { cpuid_model = BLIS_MODEL_BERGAMO; } + else + { + uint32_t l3_cache_size = bli_cpuid_query_l3_cache_size(); + if ( l3_cache_size > 393216 ) + { + cpuid_model = BLIS_MODEL_GENOA_X; + } + } } return cpuid_model; } @@ -1060,7 +1147,7 @@ model_t bli_cpuid_query_model_id( arch_t arch_id ) Copyright (C) 2017, The University of Texas at Austin Copyright (C) 2017, Devin Matthews - Copyright (C) 2018 - 2023, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2018 - 2024, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -1092,29 +1179,33 @@ model_t bli_cpuid_query_model_id( arch_t arch_id ) enum { - // input register(s) output register - FEATURE_MASK_SSE3 = (1u<< 0), // cpuid[eax=1] :ecx[0] - FEATURE_MASK_SSSE3 = (1u<< 9), // cpuid[eax=1] :ecx[9] - FEATURE_MASK_SSE41 = (1u<<19), // cpuid[eax=1] :ecx[19] - FEATURE_MASK_SSE42 = (1u<<20), // cpuid[eax=1] :ecx[20] - FEATURE_MASK_AVX = (1u<<28), // cpuid[eax=1] :ecx[28] - FEATURE_MASK_AVX2 = (1u<< 5), // cpuid[eax=7,ecx=0] :ebx[5] - FEATURE_MASK_FMA3 = (1u<<12), // cpuid[eax=1] :ecx[12] - FEATURE_MASK_FMA4 = (1u<<16), // cpuid[eax=0x80000001]:ecx[16] - FEATURE_MASK_AVX512F = (1u<<16), // cpuid[eax=7,ecx=0] :ebx[16] - FEATURE_MASK_AVX512DQ = (1u<<17), // cpuid[eax=7,ecx=0] :ebx[17] - FEATURE_MASK_AVX512PF = (1u<<26), // cpuid[eax=7,ecx=0] :ebx[26] - FEATURE_MASK_AVX512ER = (1u<<27), // cpuid[eax=7,ecx=0] :ebx[27] - FEATURE_MASK_AVX512CD = (1u<<28), // cpuid[eax=7,ecx=0] :ebx[28] - FEATURE_MASK_AVX512BW = (1u<<30), // cpuid[eax=7,ecx=0] :ebx[30] - FEATURE_MASK_AVX512VL = (1u<<31), // cpuid[eax=7,ecx=0] :ebx[31] - FEATURE_MASK_AVX512VNNI = (1u<<11), // cpuid[eax=7,ecx=0] :ecx[11] - FEATURE_MASK_AVX512BF16 = (1u<< 5), // cpuid[eax=7,ecx=1] :eax[5] - FEATURE_MASK_XGETBV = (1u<<26)| - (1u<<27), // cpuid[eax=1] :ecx[27:26] - XGETBV_MASK_XMM = 0x02u, // xcr0[1] - XGETBV_MASK_YMM = 0x04u, // xcr0[2] - XGETBV_MASK_ZMM = 0xe0u // xcr0[7:5] + // input register(s) output register + FEATURE_MASK_SSE3 = (1u<< 0), // cpuid[eax=1] :ecx[0] + FEATURE_MASK_SSSE3 = (1u<< 9), // cpuid[eax=1] :ecx[9] + FEATURE_MASK_SSE41 = (1u<<19), // cpuid[eax=1] :ecx[19] + FEATURE_MASK_SSE42 = (1u<<20), // cpuid[eax=1] :ecx[20] + FEATURE_MASK_AVX = (1u<<28), // cpuid[eax=1] :ecx[28] + FEATURE_MASK_AVX2 = (1u<< 5), // cpuid[eax=7,ecx=0] :ebx[5] + FEATURE_MASK_FMA3 = (1u<<12), // cpuid[eax=1] :ecx[12] + FEATURE_MASK_FMA4 = (1u<<16), // cpuid[eax=0x80000001] :ecx[16] + FEATURE_MASK_AVX512F = (1u<<16), // cpuid[eax=7,ecx=0] :ebx[16] + FEATURE_MASK_AVX512DQ = (1u<<17), // cpuid[eax=7,ecx=0] :ebx[17] + FEATURE_MASK_AVX512PF = (1u<<26), // cpuid[eax=7,ecx=0] :ebx[26] + FEATURE_MASK_AVX512ER = (1u<<27), // cpuid[eax=7,ecx=0] :ebx[27] + FEATURE_MASK_AVX512CD = (1u<<28), // cpuid[eax=7,ecx=0] :ebx[28] + FEATURE_MASK_AVX512BW = (1u<<30), // cpuid[eax=7,ecx=0] :ebx[30] + FEATURE_MASK_AVX512VL = (1u<<31), // cpuid[eax=7,ecx=0] :ebx[31] + FEATURE_MASK_AVX512VNNI = (1u<<11), // cpuid[eax=7,ecx=0] :ecx[11] + FEATURE_MASK_MOVDIRI = (1u<<27), // cpuid[eax=7,ecx=0] :ecx[27] + FEATURE_MASK_MOVDIR64B = (1u<<28), // cpuid[eax=7,ecx=0] :ecx[28] + FEATURE_MASK_AVX512VP2INTERSECT = (1u<<8), // cpuid[eax=7,ecx=0] :edx[8] + FEATURE_MASK_AVXVNNI = (1u<< 4), // cpuid[eax=7,ecx=1] :eax[4] + FEATURE_MASK_AVX512BF16 = (1u<< 5), // cpuid[eax=7,ecx=1] :eax[5] + FEATURE_MASK_XGETBV = (1u<<26)| + (1u<<27), // cpuid[eax=1] :ecx[27:26] + XGETBV_MASK_XMM = 0x02u, // xcr0[1] + XGETBV_MASK_YMM = 0x04u, // xcr0[2] + XGETBV_MASK_ZMM = 0xe0u // xcr0[7:5] }; @@ -1178,6 +1269,10 @@ uint32_t bli_cpuid_query if ( bli_cpuid_has_features( ebx, FEATURE_MASK_AVX512VL ) ) *features |= FEATURE_AVX512VL; if ( bli_cpuid_has_features( ecx, FEATURE_MASK_AVX512VNNI ) ) *features |= FEATURE_AVX512VNNI; + if ( bli_cpuid_has_features( ecx, FEATURE_MASK_MOVDIRI ) ) *features |= FEATURE_MOVDIRI; + if ( bli_cpuid_has_features( ecx, FEATURE_MASK_MOVDIR64B ) ) *features |= FEATURE_MOVDIR64B; + + if ( bli_cpuid_has_features( edx, FEATURE_MASK_AVX512VP2INTERSECT ) ) *features |= FEATURE_AVX512VP2INTERSECT; // This is actually a macro that modifies the last four operands, // hence why they are not passed by address. @@ -1186,6 +1281,7 @@ uint32_t bli_cpuid_query // 5th feature bit of the returned value __cpuid_count( 7, 1, eax, ebx, ecx, edx ); + if ( bli_cpuid_has_features( eax, FEATURE_MASK_AVXVNNI ) ) *features |= FEATURE_AVXVNNI; if ( bli_cpuid_has_features( eax, FEATURE_MASK_AVX512BF16 ) ) *features |= FEATURE_AVX512BF16; } @@ -1306,8 +1402,8 @@ uint32_t bli_cpuid_query // only if the xcr[7:5] bits are set. If they are not set, then // clear all feature bits related to AVX-512. if ( !bli_cpuid_has_features( eax, XGETBV_MASK_XMM | - XGETBV_MASK_YMM | - XGETBV_MASK_ZMM ) ) + XGETBV_MASK_YMM | + XGETBV_MASK_ZMM ) ) { *features &= ~( FEATURE_AVX512F | FEATURE_AVX512DQ | @@ -1322,7 +1418,7 @@ uint32_t bli_cpuid_query // only if the xcr[2] bit is set. If it is not set, then // clear all feature bits related to AVX. if ( !bli_cpuid_has_features( eax, XGETBV_MASK_XMM | - XGETBV_MASK_YMM ) ) + XGETBV_MASK_YMM ) ) { *features &= ~( FEATURE_AVX | FEATURE_AVX2 | diff --git a/frame/base/bli_cpuid.h b/frame/base/bli_cpuid.h index 5b5229758..fff30896f 100644 --- a/frame/base/bli_cpuid.h +++ b/frame/base/bli_cpuid.h @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2018 - 2023, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2018 - 2024, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -68,6 +68,7 @@ bool bli_cpuid_is_sandybridge( uint32_t family, uint32_t model, uint32_t feature bool bli_cpuid_is_penryn( uint32_t family, uint32_t model, uint32_t features ); // AMD +bool bli_cpuid_is_zen5( uint32_t family, uint32_t model, uint32_t features ); bool bli_cpuid_is_zen4( uint32_t family, uint32_t model, uint32_t features ); bool bli_cpuid_is_avx512_fallback( uint32_t family, uint32_t model, uint32_t features ); bool bli_cpuid_is_zen3( uint32_t family, uint32_t model, uint32_t features ); @@ -78,6 +79,7 @@ bool bli_cpuid_is_steamroller( uint32_t family, uint32_t model, uint32_t feature bool bli_cpuid_is_piledriver( uint32_t family, uint32_t model, uint32_t features ); bool bli_cpuid_is_bulldozer( uint32_t family, uint32_t model, uint32_t features ); +model_t bli_cpuid_get_zen5_cpuid_model( uint32_t family, uint32_t model, uint32_t features ); model_t bli_cpuid_get_zen4_cpuid_model( uint32_t family, uint32_t model, uint32_t features ); model_t bli_cpuid_get_zen3_cpuid_model( uint32_t family, uint32_t model, uint32_t features ); @@ -167,23 +169,27 @@ enum }; enum { - FEATURE_SSE3 = 0x0001, - FEATURE_SSSE3 = 0x0002, - FEATURE_SSE41 = 0x0004, - FEATURE_SSE42 = 0x0008, - FEATURE_AVX = 0x0010, - FEATURE_AVX2 = 0x0020, - FEATURE_FMA3 = 0x0040, - FEATURE_FMA4 = 0x0080, - FEATURE_AVX512F = 0x0100, - FEATURE_AVX512DQ = 0x0200, - FEATURE_AVX512PF = 0x0400, - FEATURE_AVX512ER = 0x0800, - FEATURE_AVX512CD = 0x1000, - FEATURE_AVX512BW = 0x2000, - FEATURE_AVX512VL = 0x4000, - FEATURE_AVX512VNNI = 0x8000, - FEATURE_AVX512BF16 = 0x10000 + FEATURE_SSE3 = 0x0001, + FEATURE_SSSE3 = 0x0002, + FEATURE_SSE41 = 0x0004, + FEATURE_SSE42 = 0x0008, + FEATURE_AVX = 0x0010, + FEATURE_AVX2 = 0x0020, + FEATURE_FMA3 = 0x0040, + FEATURE_FMA4 = 0x0080, + FEATURE_AVX512F = 0x0100, + FEATURE_AVX512DQ = 0x0200, + FEATURE_AVX512PF = 0x0400, + FEATURE_AVX512ER = 0x0800, + FEATURE_AVX512CD = 0x1000, + FEATURE_AVX512BW = 0x2000, + FEATURE_AVX512VL = 0x4000, + FEATURE_AVX512VNNI = 0x8000, + FEATURE_AVX512BF16 = 0x10000, + FEATURE_AVXVNNI = 0x20000, + FEATURE_AVX512VP2INTERSECT = 0x40000, + FEATURE_MOVDIRI = 0x80000, + FEATURE_MOVDIR64B = 0x100000 }; #elif defined(__aarch64__) || defined(__arm__) || defined(_M_ARM) diff --git a/frame/base/bli_env.c b/frame/base/bli_env.c index 229aae258..a290e84fc 100644 --- a/frame/base/bli_env.c +++ b/frame/base/bli_env.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2018 - 2023, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2018 - 2024, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -150,6 +150,10 @@ gint_t bli_env_get_var_arch_type( const char* env, gint_t fallback ) r_val = BLIS_ARCH_PENRYN; } // AMD + else if (strcmp(str, "zen5") == 0) + { + r_val = BLIS_ARCH_ZEN4; + } else if (strcmp(str, "zen4") == 0) { r_val = BLIS_ARCH_ZEN4; @@ -313,7 +317,17 @@ gint_t bli_env_get_var_model_type( const char* env, gint_t fallback ) str[i] = tolower(str[i]); } // AMD - if (strcmp(str, "genoa") == 0) + if (strcmp(str, "turin") == 0) + { + r_val = BLIS_MODEL_TURIN; + } + else if ((strcmp(str, "turin_dense") == 0) || + (strcmp(str, "turin-dense") == 0) || + (strcmp(str, "turindense") == 0)) + { + r_val = BLIS_MODEL_TURIN_DENSE; + } + else if (strcmp(str, "genoa") == 0) { r_val = BLIS_MODEL_GENOA; } diff --git a/frame/base/bli_gks.c b/frame/base/bli_gks.c index 321d72555..2e584f1ec 100644 --- a/frame/base/bli_gks.c +++ b/frame/base/bli_gks.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2018 - 2023, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2018 - 2024, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -107,6 +107,11 @@ void bli_gks_init( void ) #endif // AMD architectures +#ifdef BLIS_CONFIG_ZEN5 + bli_gks_register_cntx( BLIS_ARCH_ZEN4, bli_cntx_init_zen4, + bli_cntx_init_zen4_ref, + bli_cntx_init_zen4_ind ); +#endif #ifdef BLIS_CONFIG_ZEN4 bli_gks_register_cntx( BLIS_ARCH_ZEN4, bli_cntx_init_zen4, bli_cntx_init_zen4_ref, diff --git a/frame/include/bli_arch_config.h b/frame/include/bli_arch_config.h index 0b0107efa..862510dd6 100644 --- a/frame/include/bli_arch_config.h +++ b/frame/include/bli_arch_config.h @@ -6,7 +6,7 @@ Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP - Copyright (C) 2019 - 2022, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2019 - 2024, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -62,6 +62,9 @@ CNTX_INIT_PROTS( penryn ) #endif // -- AMD64 architectures -- +#ifdef BLIS_CONFIG_ZEN5 +CNTX_INIT_PROTS( zen5 ) +#endif #ifdef BLIS_CONFIG_ZEN4 CNTX_INIT_PROTS( zen4 ) #endif @@ -177,6 +180,9 @@ CNTX_INIT_PROTS( generic ) // -- AMD64 architectures -- +#ifdef BLIS_FAMILY_ZEN5 +#include "bli_family_zen5.h" +#endif #ifdef BLIS_FAMILY_ZEN4 #include "bli_family_zen4.h" #endif diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index e3355e843..e4e782aac 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -6,7 +6,7 @@ Copyright (C) 2014, The University of Texas at Austin Copyright (C) 2016, Hewlett Packard Enterprise Development LP - Copyright (C) 2021 - 2023, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2021 - 2024, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -1023,6 +1023,7 @@ typedef enum BLIS_ARCH_PENRYN, // AMD + BLIS_ARCH_ZEN5, BLIS_ARCH_ZEN4, BLIS_ARCH_ZEN3, BLIS_ARCH_ZEN2, @@ -1063,6 +1064,10 @@ typedef enum // Default model BLIS_MODEL_DEFAULT, + // AMD Zen5 + BLIS_MODEL_TURIN, + BLIS_MODEL_TURIN_DENSE, + // AMD Zen4 BLIS_MODEL_GENOA, BLIS_MODEL_BERGAMO,