From 8310b2d5d34ca78e3b044248252668bdd807d3a8 Mon Sep 17 00:00:00 2001 From: "Smyth, Edward" Date: Wed, 4 Feb 2026 13:16:46 +0000 Subject: [PATCH] Optimize bli_arch_query_id and related functions bli_arch_query_id() is used to select kernels in optimized BLAS APIs. Previous implementation incurred the overhead of multiple function calls. This has been reduced by: - Changing the function to be defined in a header file so it can be inlined. - Avoiding call to bli_arch_check_id_once that was a wrapper for a call to bli_pthread_once. Instead bli_pthread_once is called directly. - For builds with a single BLIS sub-configuration, correct arch_id is taken directly from a header file in the corresponding config subdirectory, avoiding the bli_pthread_once call and making the value explicit at compile time, which may enable additional optimizations. To enable these changes, the variables arch_id and model_id defined in frame/base/bli_arch.c are no longer static, as they must be accessed in multiple files (i.e. they are now global variables). Rename to g_arch_id and g_model_id to distinguish from any locally defined arch_id or model_id variables. --- addon/aocl_gemm/aocl_gemm_bf16bf16f32obf16.c | 4 +- addon/aocl_gemm/aocl_gemm_bf16bf16f32of32.c | 4 +- addon/aocl_gemm/config/lpgemm_config.c | 4 +- config/a64fx/bli_family_a64fx.h | 2 + config/arm32/bli_family_arm32.h | 1 - config/armsve/bli_family_armsve.h | 2 + config/bgq/bli_family_bgq.h | 2 + config/bulldozer/bli_family_bulldozer.h | 2 + config/cortexa15/bli_family_cortexa15.h | 2 + config/cortexa53/bli_family_cortexa53.h | 2 + config/cortexa57/bli_family_cortexa57.h | 2 + config/cortexa9/bli_family_cortexa9.h | 2 + config/excavator/bli_family_excavator.h | 2 + config/firestorm/bli_family_firestorm.h | 2 + config/generic/bli_family_generic.h | 4 +- config/haswell/bli_family_haswell.h | 3 +- config/knc/bli_family_knc.h | 2 + config/knl/bli_family_knl.h | 2 + config/penryn/bli_family_penryn.h | 2 + config/piledriver/bli_family_piledriver.h | 2 + config/power10/bli_family_power10.h | 3 + config/power7/bli_family_power7.h | 2 + config/power9/bli_family_power9.h | 3 + config/sandybridge/bli_family_sandybridge.h | 2 + config/skx/bli_family_skx.h | 3 + config/steamroller/bli_family_steamroller.h | 2 + config/thunderx2/bli_family_thunderx2.h | 2 + config/zen/bli_family_zen.h | 4 +- config/zen2/bli_family_zen2.h | 8 +- config/zen3/bli_family_zen3.h | 8 +- config/zen4/bli_family_zen4.h | 8 +- config/zen5/bli_family_zen5.h | 8 +- frame/1m/packm/bli_packm_blk_var1.c | 4 +- frame/2/gemv/bli_gemv_unf_var1_amd.c | 6 +- frame/2/gemv/bli_gemv_unf_var2_amd.c | 6 +- frame/2/trsv/bli_trsv_unf_var1_amd.c | 4 +- frame/2/trsv/bli_trsv_unf_var2_amd.c | 4 +- frame/3/bli_l3_smart_threading.c | 4 +- frame/3/bli_l3_sup.c | 4 +- frame/3/bli_l3_sup_int_amd.c | 8 +- frame/3/gemm/bli_gemm_front_amd.c | 6 +- frame/3/gemm/bli_gemm_ker_var2.c | 4 +- frame/3/gemm/bli_tiny_gemm_amd.c | 4 +- frame/base/bli_arch.c | 207 +++++-------------- frame/base/bli_arch.h | 81 +++++++- frame/base/bli_gks.c | 18 +- frame/base/bli_rntm.c | 10 +- frame/compat/bla_amax_amd.c | 4 +- frame/compat/bla_axpy_amd.c | 8 +- frame/compat/bla_copy_amd.c | 8 +- frame/compat/bla_dot_amd.c | 10 +- frame/compat/bla_gemm_amd.c | 24 +-- frame/compat/bla_hemm.c | 4 +- frame/compat/bla_her2k.c | 4 +- frame/compat/bla_herk.c | 4 +- frame/compat/bla_scal_amd.c | 12 +- frame/compat/bla_symm.c | 4 +- frame/compat/bla_syr2k.c | 4 +- frame/compat/bla_syrk.c | 4 +- frame/compat/bla_trmm.c | 4 +- frame/compat/bla_trsm_amd.c | 16 +- frame/compat/extra/bla_axpby_amd.c | 10 +- frame/compat/extra/bla_gemmt.c | 4 +- frame/util/bli_util_unb_var1.c | 10 +- kernels/zen4/1f/bli_axpyf_zen4_int.c | 4 +- kernels/zen4/2/bli_gemv_n_zen4_int.c | 10 +- 66 files changed, 318 insertions(+), 301 deletions(-) diff --git a/addon/aocl_gemm/aocl_gemm_bf16bf16f32obf16.c b/addon/aocl_gemm/aocl_gemm_bf16bf16f32obf16.c index 58ce1e62f..00fa019b4 100644 --- a/addon/aocl_gemm/aocl_gemm_bf16bf16f32obf16.c +++ b/addon/aocl_gemm/aocl_gemm_bf16bf16f32obf16.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2022 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -235,7 +235,7 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,bfloat16,float,bf16bf16f32obf16) * as the tiny path for BF16->FP32 is not available. Hence the arch_id also has to be * verified here. */ - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); if( ( bli_cpuid_is_avx512bf16_supported() == TRUE ) && ( ( arch_id == BLIS_ARCH_ZEN4 ) || ( arch_id == BLIS_ARCH_ZEN5 ) ) && ( is_single_thread( &rntm_g ) == TRUE) ) { if( ( is_row_major == TRUE ) && diff --git a/addon/aocl_gemm/aocl_gemm_bf16bf16f32of32.c b/addon/aocl_gemm/aocl_gemm_bf16bf16f32of32.c index 111fecdfb..68e1a365b 100644 --- a/addon/aocl_gemm/aocl_gemm_bf16bf16f32of32.c +++ b/addon/aocl_gemm/aocl_gemm_bf16bf16f32of32.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2022 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -240,7 +240,7 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,float,float,bf16bf16f32of32) * as the tiny path for BF16->FP32 is not available. Hence the arch_id also has to be * verified here. */ - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); if( ( bli_cpuid_is_avx512bf16_supported() == TRUE ) && ( ( arch_id == BLIS_ARCH_ZEN4 ) || ( arch_id == BLIS_ARCH_ZEN5 ) ) && ( is_tiny_input_bf16of32( m, n, k, lcntx_g ) == TRUE ) && ( is_single_thread( &rntm_g ) == TRUE) && diff --git a/addon/aocl_gemm/config/lpgemm_config.c b/addon/aocl_gemm/config/lpgemm_config.c index ba1575329..9afdeea81 100644 --- a/addon/aocl_gemm/config/lpgemm_config.c +++ b/addon/aocl_gemm/config/lpgemm_config.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2022 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -102,7 +102,7 @@ static bli_pthread_once_t once_check_lpgemm_func_map_init = BLIS_PTHREAD_ONCE_IN static void _lpgemm_init_enable_arch() { - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); bool enbl_instr = bli_aocl_enable_instruction_query(); if ( ( enbl_instr == TRUE ) && diff --git a/config/a64fx/bli_family_a64fx.h b/config/a64fx/bli_family_a64fx.h index 5e3f29fd4..7831efef2 100644 --- a/config/a64fx/bli_family_a64fx.h +++ b/config/a64fx/bli_family_a64fx.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -35,6 +36,7 @@ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_A64FX // -- MEMORY ALLOCATION -------------------------------------------------------- diff --git a/config/arm32/bli_family_arm32.h b/config/arm32/bli_family_arm32.h index 278c22818..1ba4d9007 100644 --- a/config/arm32/bli_family_arm32.h +++ b/config/arm32/bli_family_arm32.h @@ -35,7 +35,6 @@ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H - // -- MEMORY ALLOCATION -------------------------------------------------------- #define BLIS_SIMD_ALIGN_SIZE 16 diff --git a/config/armsve/bli_family_armsve.h b/config/armsve/bli_family_armsve.h index b67ae7c60..e50949154 100644 --- a/config/armsve/bli_family_armsve.h +++ b/config/armsve/bli_family_armsve.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -35,6 +36,7 @@ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_ARMSVE // -- MEMORY ALLOCATION -------------------------------------------------------- diff --git a/config/bgq/bli_family_bgq.h b/config/bgq/bli_family_bgq.h index c8f9f192a..fa454ac4a 100644 --- a/config/bgq/bli_family_bgq.h +++ b/config/bgq/bli_family_bgq.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -35,6 +36,7 @@ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_BGQ #undef restrict diff --git a/config/bulldozer/bli_family_bulldozer.h b/config/bulldozer/bli_family_bulldozer.h index 43fb19c4e..f48d6cf25 100644 --- a/config/bulldozer/bli_family_bulldozer.h +++ b/config/bulldozer/bli_family_bulldozer.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -35,6 +36,7 @@ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_BULLDOZER #if 0 diff --git a/config/cortexa15/bli_family_cortexa15.h b/config/cortexa15/bli_family_cortexa15.h index ee5cd6a35..9c44297d8 100644 --- a/config/cortexa15/bli_family_cortexa15.h +++ b/config/cortexa15/bli_family_cortexa15.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -35,6 +36,7 @@ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_CORTEXA15 // -- MEMORY ALLOCATION -------------------------------------------------------- diff --git a/config/cortexa53/bli_family_cortexa53.h b/config/cortexa53/bli_family_cortexa53.h index e5b50fa07..2620bbd90 100644 --- a/config/cortexa53/bli_family_cortexa53.h +++ b/config/cortexa53/bli_family_cortexa53.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -32,6 +33,7 @@ */ +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_CORTEXA53 // -- MEMORY ALLOCATION -------------------------------------------------------- diff --git a/config/cortexa57/bli_family_cortexa57.h b/config/cortexa57/bli_family_cortexa57.h index 4a60ed2f2..e6416647a 100644 --- a/config/cortexa57/bli_family_cortexa57.h +++ b/config/cortexa57/bli_family_cortexa57.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -35,6 +36,7 @@ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_CORTEXA57 // -- MEMORY ALLOCATION -------------------------------------------------------- diff --git a/config/cortexa9/bli_family_cortexa9.h b/config/cortexa9/bli_family_cortexa9.h index 95158f435..63c8743ee 100644 --- a/config/cortexa9/bli_family_cortexa9.h +++ b/config/cortexa9/bli_family_cortexa9.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -35,6 +36,7 @@ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_CORTEXA9 // -- MEMORY ALLOCATION -------------------------------------------------------- diff --git a/config/excavator/bli_family_excavator.h b/config/excavator/bli_family_excavator.h index acef4f319..ed8fbe45f 100644 --- a/config/excavator/bli_family_excavator.h +++ b/config/excavator/bli_family_excavator.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -35,6 +36,7 @@ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_EXCAVATOR // -- MEMORY ALLOCATION -------------------------------------------------------- diff --git a/config/firestorm/bli_family_firestorm.h b/config/firestorm/bli_family_firestorm.h index 4a60ed2f2..94337cc4f 100644 --- a/config/firestorm/bli_family_firestorm.h +++ b/config/firestorm/bli_family_firestorm.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -35,6 +36,7 @@ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_FIRESTORM // -- MEMORY ALLOCATION -------------------------------------------------------- diff --git a/config/generic/bli_family_generic.h b/config/generic/bli_family_generic.h index c25a57d9d..3045816a8 100644 --- a/config/generic/bli_family_generic.h +++ b/config/generic/bli_family_generic.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2025 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -35,8 +36,7 @@ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H - - +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_GENERIC //#endif diff --git a/config/haswell/bli_family_haswell.h b/config/haswell/bli_family_haswell.h index 5be492e56..e4298a980 100644 --- a/config/haswell/bli_family_haswell.h +++ b/config/haswell/bli_family_haswell.h @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2019 - 2023, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -36,6 +36,7 @@ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_HASWELL #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS AND DEFINITIONS --------------------------- diff --git a/config/knc/bli_family_knc.h b/config/knc/bli_family_knc.h index 6f9e03e8f..26cf323c1 100644 --- a/config/knc/bli_family_knc.h +++ b/config/knc/bli_family_knc.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -35,6 +36,7 @@ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_KNC // -- THREADING PARAMTERS ------------------------------------------------------ diff --git a/config/knl/bli_family_knl.h b/config/knl/bli_family_knl.h index 64994cd9d..c79e23317 100644 --- a/config/knl/bli_family_knl.h +++ b/config/knl/bli_family_knl.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -35,6 +36,7 @@ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_KNL // -- THREADING PARAMETERS ----------------------------------------------------- diff --git a/config/penryn/bli_family_penryn.h b/config/penryn/bli_family_penryn.h index 3cbcf6d70..8fe2c959c 100644 --- a/config/penryn/bli_family_penryn.h +++ b/config/penryn/bli_family_penryn.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -35,6 +36,7 @@ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_PENRYN // -- MEMORY ALLOCATION -------------------------------------------------------- diff --git a/config/piledriver/bli_family_piledriver.h b/config/piledriver/bli_family_piledriver.h index 1a851bc1e..936b9385f 100644 --- a/config/piledriver/bli_family_piledriver.h +++ b/config/piledriver/bli_family_piledriver.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -35,6 +36,7 @@ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_PILEDRIVER // -- MEMORY ALLOCATION -------------------------------------------------------- diff --git a/config/power10/bli_family_power10.h b/config/power10/bli_family_power10.h index 432773893..2e40992f0 100644 --- a/config/power10/bli_family_power10.h +++ b/config/power10/bli_family_power10.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2019, The University of Texas at Austin + Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -32,6 +33,8 @@ */ +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_POWER10 + #define BLIS_POOL_ADDR_ALIGN_SIZE_A 4096 #define BLIS_POOL_ADDR_ALIGN_SIZE_B 4096 diff --git a/config/power7/bli_family_power7.h b/config/power7/bli_family_power7.h index 415d5faa9..26f060dd2 100644 --- a/config/power7/bli_family_power7.h +++ b/config/power7/bli_family_power7.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -35,6 +36,7 @@ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_POWER7 #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS ------------------------------------------- diff --git a/config/power9/bli_family_power9.h b/config/power9/bli_family_power9.h index 12b16444f..35d831aa8 100644 --- a/config/power9/bli_family_power9.h +++ b/config/power9/bli_family_power9.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2019, The University of Texas at Austin + Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -32,6 +33,8 @@ */ +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_POWER9 + #define BLIS_POOL_ADDR_ALIGN_SIZE_A 4096 #define BLIS_POOL_ADDR_ALIGN_SIZE_B 4096 diff --git a/config/sandybridge/bli_family_sandybridge.h b/config/sandybridge/bli_family_sandybridge.h index 164f8e44b..cd5145ff6 100644 --- a/config/sandybridge/bli_family_sandybridge.h +++ b/config/sandybridge/bli_family_sandybridge.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -35,6 +36,7 @@ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_SANDYBRIDGE #if 0 // -- LEVEL-3 MICRO-KERNEL CONSTANTS AND DEFINITIONS --------------------------- diff --git a/config/skx/bli_family_skx.h b/config/skx/bli_family_skx.h index ac9478f8b..5f4b68986 100644 --- a/config/skx/bli_family_skx.h +++ b/config/skx/bli_family_skx.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -35,6 +36,8 @@ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_SKX + // -- THREADING PARAMETERS ----------------------------------------------------- #define BLIS_THREAD_RATIO_M 3 diff --git a/config/steamroller/bli_family_steamroller.h b/config/steamroller/bli_family_steamroller.h index 118b5cf8b..3f77bcd77 100644 --- a/config/steamroller/bli_family_steamroller.h +++ b/config/steamroller/bli_family_steamroller.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -35,6 +36,7 @@ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_STEAMROLLER // -- MEMORY ALLOCATION -------------------------------------------------------- diff --git a/config/thunderx2/bli_family_thunderx2.h b/config/thunderx2/bli_family_thunderx2.h index 8e8d95409..fded0be3f 100644 --- a/config/thunderx2/bli_family_thunderx2.h +++ b/config/thunderx2/bli_family_thunderx2.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -35,6 +36,7 @@ //#ifndef BLIS_FAMILY_H //#define BLIS_FAMILY_H +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_THUNDERX2 // -- MEMORY ALLOCATION -------------------------------------------------------- diff --git a/config/zen/bli_family_zen.h b/config/zen/bli_family_zen.h index 6ba9d39c8..670d21869 100644 --- a/config/zen/bli_family_zen.h +++ b/config/zen/bli_family_zen.h @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2018 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -36,6 +36,8 @@ #ifndef BLIS_FAMILY_ZEN_H #define BLIS_FAMILY_ZEN_H +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_ZEN + #include "bli_config_zen.h" // By default, it is effective to parallelize the outer loops. diff --git a/config/zen2/bli_family_zen2.h b/config/zen2/bli_family_zen2.h index 655aef7c6..2534243b9 100644 --- a/config/zen2/bli_family_zen2.h +++ b/config/zen2/bli_family_zen2.h @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -33,8 +33,10 @@ */ -#ifndef BLI_FAMILY_ZEN2_ -#define BLI_FAMILY_ZEN2_ +#ifndef BLIS_FAMILY_ZEN2_H +#define BLIS_FAMILY_ZEN2_H + +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_ZEN2 // By default, it is effective to parallelize the outer loops. // Setting these macros to 1 will force JR and IR inner loops diff --git a/config/zen3/bli_family_zen3.h b/config/zen3/bli_family_zen3.h index cdb19350f..7912b83e8 100644 --- a/config/zen3/bli_family_zen3.h +++ b/config/zen3/bli_family_zen3.h @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2020 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -33,8 +33,10 @@ */ -#ifndef BLI_FAMILY_ZEN3_ -#define BLI_FAMILY_ZEN3_ +#ifndef BLIS_FAMILY_ZEN3_H +#define BLIS_FAMILY_ZEN3_H + +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_ZEN3 // By default, it is effective to parallelize the outer loops. // Setting these macros to 1 will force JR and IR inner loops diff --git a/config/zen4/bli_family_zen4.h b/config/zen4/bli_family_zen4.h index 5b82a14dc..e00bac0da 100644 --- a/config/zen4/bli_family_zen4.h +++ b/config/zen4/bli_family_zen4.h @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2021 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2021 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -32,8 +32,10 @@ */ -#ifndef BLI_FAMILY_ZEN4_ -#define BLI_FAMILY_ZEN4_ +#ifndef BLIS_FAMILY_ZEN4_H +#define BLIS_FAMILY_ZEN4_H + +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_ZEN4 #include "bli_config_zen4.h" diff --git a/config/zen5/bli_family_zen5.h b/config/zen5/bli_family_zen5.h index e0889e5c6..695d978b2 100644 --- a/config/zen5/bli_family_zen5.h +++ b/config/zen5/bli_family_zen5.h @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2024 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2024 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -32,8 +32,10 @@ */ -#ifndef BLI_FAMILY_ZEN5_ -#define BLI_FAMILY_ZEN5_ +#ifndef BLIS_FAMILY_ZEN5_H +#define BLIS_FAMILY_ZEN5_H + +#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_ZEN5 #include "bli_config_zen5.h" diff --git a/frame/1m/packm/bli_packm_blk_var1.c b/frame/1m/packm/bli_packm_blk_var1.c index 4b786d885..c7ddfead0 100644 --- a/frame/1m/packm/bli_packm_blk_var1.c +++ b/frame/1m/packm/bli_packm_blk_var1.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2018 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -187,7 +187,7 @@ void bli_packm_blk_var1 // For DGEMM in AVX512, scale by alpha during packing // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); if ( diff --git a/frame/2/gemv/bli_gemv_unf_var1_amd.c b/frame/2/gemv/bli_gemv_unf_var1_amd.c index 9267b97d6..f47691862 100644 --- a/frame/2/gemv/bli_gemv_unf_var1_amd.c +++ b/frame/2/gemv/bli_gemv_unf_var1_amd.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2020 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -292,7 +292,7 @@ void bli_dgemv_unf_var1 the support of AVX512 or AVX2, if AVX512 - arch_id will be zen4 and zen5 or for AVX2 it will be zen3. */ - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); #if defined(BLIS_ENABLE_OPENMP) && defined(AOCL_DYNAMIC) // Setting the threshold to invoke the fast-path @@ -892,7 +892,7 @@ void bli_zgemv_unf_var1 the support of AVX512 or AVX2, if AVX512 - arch_id will be zen4 or for AVX2 it will be zen3. */ - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); switch ( arch_id ) { diff --git a/frame/2/gemv/bli_gemv_unf_var2_amd.c b/frame/2/gemv/bli_gemv_unf_var2_amd.c index 2846909b8..8483e968c 100644 --- a/frame/2/gemv/bli_gemv_unf_var2_amd.c +++ b/frame/2/gemv/bli_gemv_unf_var2_amd.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2020 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -271,7 +271,7 @@ void bli_dgemv_unf_var2 ( the support of AVX512 or AVX2, if AVX512 - arch_id will be zen4 or for AVX2 it will be zen3. */ - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // b_fuse stores the fusing factor for AXPYF kernel. dim_t b_fuse; @@ -707,7 +707,7 @@ void bli_zgemv_unf_var2 conja = bli_extract_conj(transa); // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); /* Function pointer declaration for the functions diff --git a/frame/2/trsv/bli_trsv_unf_var1_amd.c b/frame/2/trsv/bli_trsv_unf_var1_amd.c index 156abdfdf..8921987f9 100644 --- a/frame/2/trsv/bli_trsv_unf_var1_amd.c +++ b/frame/2/trsv/bli_trsv_unf_var1_amd.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -304,7 +304,7 @@ void bli_dtrsv_unf_var1 if (bli_cpuid_is_avx2fma3_supported() == TRUE) { // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); switch ( arch_id ) { diff --git a/frame/2/trsv/bli_trsv_unf_var2_amd.c b/frame/2/trsv/bli_trsv_unf_var2_amd.c index d7cd34895..134394303 100644 --- a/frame/2/trsv/bli_trsv_unf_var2_amd.c +++ b/frame/2/trsv/bli_trsv_unf_var2_amd.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -310,7 +310,7 @@ void bli_dtrsv_unf_var2 #if defined(BLIS_KERNELS_ZEN4) // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4 ) { diff --git a/frame/3/bli_l3_smart_threading.c b/frame/3/bli_l3_smart_threading.c index 591d870ab..840590446 100644 --- a/frame/3/bli_l3_smart_threading.c +++ b/frame/3/bli_l3_smart_threading.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2022 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -242,7 +242,7 @@ static err_t bli_gemm_ic_jc_optimum_sup_arch_dispatcher err_t ret_val = BLIS_FAILURE; // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); if ( arch_id == BLIS_ARCH_ZEN3 ) { diff --git a/frame/3/bli_l3_sup.c b/frame/3/bli_l3_sup.c index 5e7b80761..08396ebf1 100644 --- a/frame/3/bli_l3_sup.c +++ b/frame/3/bli_l3_sup.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -109,7 +109,7 @@ err_t bli_gemmsup #if defined(BLIS_FAMILY_ZEN5) || defined(BLIS_FAMILY_ZEN4) || defined(BLIS_FAMILY_AMDZEN) || defined(BLIS_FAMILY_X86_64) // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); if(( arch_id == BLIS_ARCH_ZEN5 ) || ( arch_id == BLIS_ARCH_ZEN4 )) { diff --git a/frame/3/bli_l3_sup_int_amd.c b/frame/3/bli_l3_sup_int_amd.c index 2344baafe..b1bea4c43 100644 --- a/frame/3/bli_l3_sup_int_amd.c +++ b/frame/3/bli_l3_sup_int_amd.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -139,7 +139,7 @@ err_t bli_gemmsup_int //Enable packing of B matrix for double data type when dims at per //thread level are above caches and enable packing of A when transA //(RRC or CRC storage ids) to avoid rd kernels - if(bli_is_double(dt) && (bli_arch_query_id() == BLIS_ARCH_ZEN3)) + if(bli_is_double(dt) && (bli_arch_query_id_internal() == BLIS_ARCH_ZEN3)) { dim_t m_pt = (m/bli_rntm_ways_for( BLIS_MC, rntm )); dim_t n_pt = (n/bli_rntm_ways_for( BLIS_NC, rntm )); @@ -217,7 +217,7 @@ err_t bli_gemmsup_int //Enable packing of B matrix for double data type when dims at per //thread level are above caches and enable packing of A when transA //(RRC or CRC storage ids) to avoid rd kernels - if(bli_is_double(dt) && (bli_arch_query_id() == BLIS_ARCH_ZEN3)) + if(bli_is_double(dt) && (bli_arch_query_id_internal() == BLIS_ARCH_ZEN3)) { dim_t m_pt = (m/bli_rntm_ways_for( BLIS_NC, rntm )); dim_t n_pt = (n/bli_rntm_ways_for( BLIS_MC, rntm )); @@ -427,7 +427,7 @@ err_t bli_gemmtsup_int /* Enable packing for A matrix for higher sizes. Note that pack A * * becomes pack B inside var2m because this is transpose case*/ - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); /* Do not pack A for ZEN4 and ZEN5 because the GEMM kernels * used are column major and GEMMT kernels used are row major. * Packing matrix A makes matrix B in the GEMMT kernels column diff --git a/frame/3/gemm/bli_gemm_front_amd.c b/frame/3/gemm/bli_gemm_front_amd.c index 41b4eb5a8..cc8e31d34 100644 --- a/frame/3/gemm/bli_gemm_front_amd.c +++ b/frame/3/gemm/bli_gemm_front_amd.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2018 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -61,7 +61,7 @@ void bli_gemm_front // TODO : As part of future work, we have to retune the entry conditions // to native(ZEN3/ZEN2/ZEN), and remove the need for dynamic threading // here (GitHub Issue #114). - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); if( bli_obj_is_dcomplex( c ) && ( ( arch_id == BLIS_ARCH_ZEN3 ) || ( arch_id == BLIS_ARCH_ZEN2 ) || ( arch_id == BLIS_ARCH_ZEN ) ) ) { @@ -318,7 +318,7 @@ void bli_gemm_front if ( bli_obj_dt( &c_local ) == bli_obj_dt( &a_local ) && bli_obj_dt( &c_local ) == bli_obj_dt( &b_local ) ) { - switch (bli_arch_query_id() ) + switch (bli_arch_query_id_internal() ) { case BLIS_ARCH_ZEN5: #if defined(BLIS_KERNELS_ZEN5) diff --git a/frame/3/gemm/bli_gemm_ker_var2.c b/frame/3/gemm/bli_gemm_ker_var2.c index d80a70231..a4a689ee3 100644 --- a/frame/3/gemm/bli_gemm_ker_var2.c +++ b/frame/3/gemm/bli_gemm_ker_var2.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2018 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -178,7 +178,7 @@ void bli_gemm_ker_var2 // TODO: Add macro kernel function pointer in cntx // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); if ( diff --git a/frame/3/gemm/bli_tiny_gemm_amd.c b/frame/3/gemm/bli_tiny_gemm_amd.c index 25d822386..7a373af28 100644 --- a/frame/3/gemm/bli_tiny_gemm_amd.c +++ b/frame/3/gemm/bli_tiny_gemm_amd.c @@ -74,7 +74,7 @@ err_t PASTEMAC( ch, tfuncname ) \ return BLIS_FAILURE; \ \ /* Query the architecture ID */ \ - arch_t arch_id = bli_arch_query_id(); \ + arch_t arch_id = bli_arch_query_id_internal(); \ /* Declaring the object to hold the kernel information */ \ gemmtiny_ukr_info_t gemmtiny_ukr_info; \ /* Variable to flag success/failure of obtaining the kernel */ \ @@ -483,7 +483,7 @@ err_t bli_dgemm_tiny ) { // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); bool is_mt = bli_thread_get_is_parallel(); { // Pick the kernel based on the architecture ID diff --git a/frame/base/bli_arch.c b/frame/base/bli_arch.c index 3c93a4873..2f8bb172c 100644 --- a/frame/base/bli_arch.c +++ b/frame/base/bli_arch.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2018 - 2024, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -73,8 +73,8 @@ static model_t actual_model_id = -1; // The arch and model ids for the currently running hardware, or the values // the user specifies to use. We initialize to -1, which will be overwritten // upon calling bli_arch_set_id(). -static arch_t arch_id = -1; -static model_t model_id = -1; +arch_t g_arch_id = -1; +model_t g_model_id = -1; // Variable used to communicate if user has set '__blis_arch_type_name' between // bli_arch_set_id() and bli_arch_check_id() @@ -91,48 +91,25 @@ bool bli_aocl_enable_instruction_query( void ) arch_t bli_arch_query_id( void ) { - bli_arch_check_id_once(); - - // Simply return the id that was previously cached. - return arch_id; + return bli_arch_query_id_internal(); } model_t bli_model_query_id( void ) { - bli_arch_check_id_once(); - - // Simply return the model_id that was previously cached. - return model_id; + return bli_model_query_id_internal(); } model_t bli_init_model_query_id( void ) { - bli_arch_set_id_once(); - - // Simply return the model_id that was previously cached. - return model_id; + return bli_init_model_query_id_internal(); } // ----------------------------------------------------------------------------- // A pthread structure used in pthread_once(). pthread_once() is guaranteed to // execute exactly once among all threads that pass in this control object. -static bli_pthread_once_t once_id_init = BLIS_PTHREAD_ONCE_INIT; -static bli_pthread_once_t once_id_check = BLIS_PTHREAD_ONCE_INIT; - -void bli_arch_set_id_once( void ) -{ -#ifndef BLIS_CONFIGURETIME_CPUID - bli_pthread_once( &once_id_init, bli_arch_set_id ); -#endif -} - -void bli_arch_check_id_once( void ) -{ -#ifndef BLIS_CONFIGURETIME_CPUID - bli_pthread_once( &once_id_check, bli_arch_check_id ); -#endif -} +bli_pthread_once_t once_id_init = BLIS_PTHREAD_ONCE_INIT; +bli_pthread_once_t once_id_check = BLIS_PTHREAD_ONCE_INIT; // ----------------------------------------------------------------------------- @@ -209,7 +186,7 @@ void bli_arch_set_id( void ) // bli_arch_check_id() called later. // For now, we can only be confident that req_id is in range. - arch_id = req_id; + g_arch_id = req_id; } else #endif @@ -226,101 +203,19 @@ void bli_arch_set_id( void ) defined BLIS_FAMILY_X86_64 || \ defined BLIS_FAMILY_ARM64 || \ defined BLIS_FAMILY_ARM32 - arch_id = actual_arch_id; - #endif - - // Intel microarchitectures. - #ifdef BLIS_FAMILY_SKX - arch_id = BLIS_ARCH_SKX; - #endif - #ifdef BLIS_FAMILY_KNL - arch_id = BLIS_ARCH_KNL; - #endif - #ifdef BLIS_FAMILY_KNC - arch_id = BLIS_ARCH_KNC; - #endif - #ifdef BLIS_FAMILY_HASWELL - arch_id = BLIS_ARCH_HASWELL; - #endif - #ifdef BLIS_FAMILY_SANDYBRIDGE - arch_id = BLIS_ARCH_SANDYBRIDGE; - #endif - #ifdef BLIS_FAMILY_PENRYN - arch_id = BLIS_ARCH_PENRYN; - #endif - - // AMD microarchitectures. - #ifdef BLIS_FAMILY_ZEN5 - arch_id = BLIS_ARCH_ZEN5; - #endif - #ifdef BLIS_FAMILY_ZEN4 - arch_id = BLIS_ARCH_ZEN4; - #endif - #ifdef BLIS_FAMILY_ZEN3 - arch_id = BLIS_ARCH_ZEN3; - #endif - #ifdef BLIS_FAMILY_ZEN2 - arch_id = BLIS_ARCH_ZEN2; - #endif - #ifdef BLIS_FAMILY_ZEN - arch_id = BLIS_ARCH_ZEN; - #endif - #ifdef BLIS_FAMILY_EXCAVATOR - arch_id = BLIS_ARCH_EXCAVATOR; - #endif - #ifdef BLIS_FAMILY_STEAMROLLER - arch_id = BLIS_ARCH_STEAMROLLER; - #endif - #ifdef BLIS_FAMILY_PILEDRIVER - arch_id = BLIS_ARCH_PILEDRIVER; - #endif - #ifdef BLIS_FAMILY_BULLDOZER - arch_id = BLIS_ARCH_BULLDOZER; - #endif - - // ARM microarchitectures. - #ifdef BLIS_FAMILY_ARMSVE - arch_id = BLIS_ARCH_ARMSVE; - #endif - #ifdef BLIS_FAMILY_A64FX - arch_id = BLIS_ARCH_A64FX; - #endif - #ifdef BLIS_FAMILY_FIRESTORM - id = BLIS_ARCH_FIRESTORM; - #endif - #ifdef BLIS_FAMILY_THUNDERX2 - arch_id = BLIS_ARCH_THUNDERX2; - #endif - #ifdef BLIS_FAMILY_CORTEXA57 - arch_id = BLIS_ARCH_CORTEXA57; - #endif - #ifdef BLIS_FAMILY_CORTEXA53 - arch_id = BLIS_ARCH_CORTEXA53; - #endif - #ifdef BLIS_FAMILY_CORTEXA15 - arch_id = BLIS_ARCH_CORTEXA15; - #endif - #ifdef BLIS_FAMILY_CORTEXA9 - arch_id = BLIS_ARCH_CORTEXA9; - #endif - - // IBM microarchitectures. - #ifdef BLIS_FAMILY_POWER10 - arch_id = BLIS_ARCH_POWER10; - #endif - #ifdef BLIS_FAMILY_POWER9 - arch_id = BLIS_ARCH_POWER9; - #endif - #ifdef BLIS_FAMILY_POWER7 - arch_id = BLIS_ARCH_POWER7; - #endif - #ifdef BLIS_FAMILY_BGQ - arch_id = BLIS_ARCH_BGQ; - #endif - - // Generic microarchitecture. - #ifdef BLIS_FAMILY_GENERIC - arch_id = BLIS_ARCH_GENERIC; + g_arch_id = actual_arch_id; + #else + #ifdef BLIS_FAMILY_TO_ARCH_VALUE + // For single sub-configuration builds, get value from header file + g_arch_id = BLIS_FAMILY_TO_ARCH_VALUE; + #else + // For "auto" build, initialize to generic as starting point. + // It will then determine the correct architecture and set + // BLIS_FAMILY_TO_ARCH_VALUE. This will also be the fallback + // if BLIS_FAMILY_TO_ARCH_VALUE is not set in the relevant + // config header file. + g_arch_id = BLIS_ARCH_GENERIC; + #endif #endif } @@ -336,14 +231,14 @@ void bli_arch_set_id( void ) if ( req_model != -1 ) { // BLIS_MODEL_TYPE was set. Cautiously check whether its value is usable. - // Assume here that arch_id is valid. + // Assume here that g_arch_id is valid. // If req_model was set to an invalid model_t value (ie: both outside // the range appropriate for the given architecture and not default), // set to default value and continue. if ( bli_error_checking_is_enabled() ) { - err_t e_val = bli_check_valid_model_id( arch_id, req_model ); + err_t e_val = bli_check_valid_model_id( g_arch_id, req_model ); if (e_val != BLIS_SUCCESS) { req_model = BLIS_MODEL_DEFAULT; @@ -354,7 +249,7 @@ void bli_arch_set_id( void ) // We can now be confident that req_model is in range for the // selected architecture, or it has been reset to be default. - model_id = req_model; + g_model_id = req_model; } else #endif @@ -363,25 +258,27 @@ void bli_arch_set_id( void ) { // BLIS_MODEL_TYPE was unset. Proceed with normal subconfiguration // selection behavior, based on value of architecture id selected - // above. Unlike for arch_id, we cannot simply use actual_model_id - // here, as we need to choose model_id based on the arch_id we are + // above. Unlike for g_arch_id, we cannot simply use actual_model_id + // here, as we need to choose g_model_id based on the g_arch_id we are // using, which could be different to actual_arch_id. - model_id = bli_cpuid_query_model_id( arch_id ); + g_model_id = bli_cpuid_query_model_id( g_arch_id ); } - //printf( "blis_arch_query_id(): arch_id, model_id = %u, %u\n", arch_id, model_id ); + //printf( "blis_arch_query_id(): g_arch_id, g_model_id = %u, %u\n", g_arch_id, g_model_id ); //exit(1); } void bli_arch_check_id( void ) { - bli_arch_set_id_once(); +#ifndef BLIS_CONFIGURETIME_CPUID + bli_pthread_once( &once_id_init, bli_arch_set_id ); +#endif bool arch_not_in_build = FALSE; bool arch_reset = FALSE; arch_t orig_arch_id= req_id; - model_t orig_model_id = model_id; + model_t orig_model_id = g_model_id; // Check arch value against configured options. Only needed // if user has set it. This function will also do the @@ -424,7 +321,7 @@ void bli_arch_check_id( void ) arch_not_in_build = TRUE; arch_reset = TRUE; req_id = actual_arch_id; - model_id = actual_model_id; + g_model_id = actual_model_id; } } @@ -450,7 +347,7 @@ void bli_arch_check_id( void ) case BLIS_ARCH_HASWELL: arch_reset = TRUE; req_id = actual_arch_id; - model_id = actual_model_id; + g_model_id = actual_model_id; break; } } @@ -469,7 +366,7 @@ void bli_arch_check_id( void ) case BLIS_ARCH_SKX: arch_reset = TRUE; req_id = actual_arch_id; - model_id = actual_model_id; + g_model_id = actual_model_id; break; } } @@ -498,12 +395,12 @@ void bli_arch_check_id( void ) bli_check_error_code( e_val ); } // If BLIS_ARCH_TYPE (or renamed version of this environment variable) - // was set, we always use this value of req_id to set arch_id. + // was set, we always use this value of req_id to set g_arch_id. } // Finally, we can be confident that req_id (1) is in range and (2) // refers to a context that has been initialized. - arch_id = req_id; + g_arch_id = req_id; } #endif @@ -514,18 +411,18 @@ void bli_arch_check_id( void ) if ( req_id == -1 && aocl_e_i) { // AOCL_ENABLE_INSTRUCTIONS was set to an invalid value - // normal system arch_id was used instead. - if ( model_id == BLIS_MODEL_DEFAULT ) + // normal system g_arch_id was used instead. + if ( g_model_id == BLIS_MODEL_DEFAULT ) { fprintf( stderr, "libblis: AOCL_ENABLE_INSTRUCTIONS env var was set to an invalid value.\n" "libblis: Selecting system default sub-configuration '%s'.\n", - bli_arch_string( arch_id ) ); + bli_arch_string( g_arch_id ) ); } else { fprintf( stderr, "libblis: AOCL_ENABLE_INSTRUCTIONS env var was set to an invalid value.\n" "libblis: Selecting system default sub-configuration '%s', model '%s'.\n", - bli_arch_string( arch_id ), bli_model_string( model_id ) ); + bli_arch_string( g_arch_id ), bli_model_string( g_model_id ) ); } } else if ( arch_not_in_build ) @@ -534,13 +431,13 @@ void bli_arch_check_id( void ) { fprintf( stderr, "libblis: Sub-configuration '%s' is not implemented in this build.\n" "libblis: Selecting system default sub-configuration '%s'.\n", - bli_arch_string( orig_arch_id ), bli_arch_string( arch_id ) ); + bli_arch_string( orig_arch_id ), bli_arch_string( g_arch_id ) ); } else { fprintf( stderr, "libblis: Sub-configuration '%s', model '%s' is not implemented in this build.\n" "libblis: Selecting system default sub-configuration '%s', model '%s'.\n", - bli_arch_string( orig_arch_id ), bli_model_string( orig_model_id ), bli_arch_string( arch_id ), bli_model_string( model_id ) ); + bli_arch_string( orig_arch_id ), bli_model_string( orig_model_id ), bli_arch_string( g_arch_id ), bli_model_string( g_model_id ) ); } } else if ( arch_reset ) @@ -549,27 +446,27 @@ void bli_arch_check_id( void ) { fprintf( stderr, "libblis: Sub-configuration '%s' is not supported on this system.\n" "libblis: Selecting system default sub-configuration '%s'.\n", - bli_arch_string( orig_arch_id ), bli_arch_string( arch_id ) ); + bli_arch_string( orig_arch_id ), bli_arch_string( g_arch_id ) ); } else { fprintf( stderr, "libblis: Sub-configuration '%s', model '%s' is not supported on this system.\n" "libblis: Selecting system default sub-configuration '%s', model '%s'.\n", - bli_arch_string( orig_arch_id ), bli_model_string( orig_model_id ), bli_arch_string( arch_id ), bli_model_string( model_id ) ); + bli_arch_string( orig_arch_id ), bli_model_string( orig_model_id ), bli_arch_string( g_arch_id ), bli_model_string( g_model_id ) ); } } else { - if ( model_id == BLIS_MODEL_DEFAULT ) + if ( g_model_id == BLIS_MODEL_DEFAULT ) { #ifdef DISABLE_BLIS_ARCH_TYPE fprintf( stderr, "libblis: Selecting sub-configuration '%s'.\n" "libblis: User control of sub-configuration using AOCL_ENABLE_INSTRUCTIONS\n" "libblis: or using "__blis_arch_type_name" and "__blis_model_type_name" is disabled.\n", - bli_arch_string( arch_id ) ); + bli_arch_string( g_arch_id ) ); #else fprintf( stderr, "libblis: Selecting sub-configuration '%s'.\n", - bli_arch_string( arch_id ) ); + bli_arch_string( g_arch_id ) ); #endif } else @@ -578,10 +475,10 @@ void bli_arch_check_id( void ) fprintf( stderr, "libblis: Selecting sub-configuration '%s', model '%s'.\n" "libblis: User control of sub-configuration using AOCL_ENABLE_INSTRUCTIONS\n" "libblis: or using "__blis_arch_type_name" and "__blis_model_type_name" is disabled.\n", - bli_arch_string( arch_id ), bli_model_string( model_id ) ); + bli_arch_string( g_arch_id ), bli_model_string( g_model_id ) ); #else fprintf( stderr, "libblis: Selecting sub-configuration '%s', model '%s'.\n", - bli_arch_string( arch_id ), bli_model_string( model_id ) ); + bli_arch_string( g_arch_id ), bli_model_string( g_model_id ) ); #endif } } @@ -601,7 +498,7 @@ void bli_arch_check_id( void ) #endif } - //printf( "blis_arch_check_id(): arch_id, model_id = %u, %u\n", arch_id, model_id ); + //printf( "blis_arch_check_id(): g_arch_id, g_model_id = %u, %u\n", g_arch_id, g_model_id ); //exit(1); } diff --git a/frame/base/bli_arch.h b/frame/base/bli_arch.h index e944fb964..4659c90dc 100644 --- a/frame/base/bli_arch.h +++ b/frame/base/bli_arch.h @@ -5,6 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -35,25 +36,89 @@ #ifndef BLIS_ARCH_H #define BLIS_ARCH_H -BLIS_EXPORT_BLIS arch_t bli_arch_query_id( void ); BLIS_EXPORT_BLIS bool bli_aocl_enable_instruction_query( void ); -void bli_arch_set_id_once( void ); -void bli_arch_set_id( void ); +BLIS_EXPORT_BLIS arch_t bli_arch_query_id( void ); -void bli_arch_check_id_once( void ); -void bli_arch_check_id( void ); +BLIS_EXPORT_BLIS model_t bli_model_query_id( void ); +BLIS_EXPORT_BLIS model_t bli_init_model_query_id( void ); BLIS_EXPORT_BLIS char* bli_arch_string( arch_t id ); +BLIS_EXPORT_BLIS char* bli_model_string( model_t id ); + +extern arch_t g_arch_id; +extern model_t g_model_id; + +extern bli_pthread_once_t once_id_check; +extern bli_pthread_once_t once_id_init; + +void bli_arch_set_id( void ); + +void bli_arch_check_id( void ); void bli_arch_set_logging( bool dolog ); bool bli_arch_get_logging( void ); void bli_arch_log( char*, ... ); -BLIS_EXPORT_BLIS model_t bli_model_query_id( void ); -BLIS_EXPORT_BLIS model_t bli_init_model_query_id( void ); +BLIS_INLINE arch_t bli_arch_query_id_internal( void ) +{ -BLIS_EXPORT_BLIS char* bli_model_string( model_t id ); +#if defined BLIS_FAMILY_INTEL64 || \ + defined BLIS_FAMILY_AMDZEN || \ + defined BLIS_FAMILY_AMD64_LEGACY || \ + defined BLIS_FAMILY_X86_64 || \ + defined BLIS_FAMILY_ARM64 || \ + defined BLIS_FAMILY_ARM32 + + // For builds with multiple sub-configurations use the global value + // that will reflect dynamic dispatch, subject to any user override + // via environment variables. + #ifndef BLIS_CONFIGURETIME_CPUID + bli_pthread_once( &once_id_check, bli_arch_check_id ); + #endif + // Simply return the id that was previously cached. + return g_arch_id; + +#else + + #if defined BLIS_FAMILY_TO_ARCH_VALUE + // For single sub-configuration builds, get value from header file + arch_t l_arch_id = BLIS_FAMILY_TO_ARCH_VALUE; + #elif defined BLIS_CONFIGURETIME_CPUID + // For "auto" build, initialize BLIS_FAMILY_TO_ARCH_VALUE to + // generic as starting point for use in architecture detection. + // BLIS will then determine the correct architecture and get + // the correct BLIS_FAMILY_TO_ARCH_VALUE from the relevant + // sub-configuration header file. + arch_t l_arch_id = BLIS_ARCH_GENERIC; + #else + // No fallback if BLIS_FAMILY_TO_ARCH_VALUE is not set in + // the relevant config bli_family header file + #error "BLIS_FAMILY_TO_ARCH_VALUE not defined in relevant config bli_family header file" + #endif + return l_arch_id; + +#endif + +} + +BLIS_INLINE model_t bli_model_query_id_internal( void ) +{ +#ifndef BLIS_CONFIGURETIME_CPUID + bli_pthread_once( &once_id_check, bli_arch_check_id ); +#endif + // Simply return the model_id that was previously cached. + return g_model_id; +} + +BLIS_INLINE model_t bli_init_model_query_id_internal( void ) +{ +#ifndef BLIS_CONFIGURETIME_CPUID + bli_pthread_once( &once_id_init, bli_arch_set_id ); +#endif + // Simply return the model_id that was previously cached. + return g_model_id; +} #endif diff --git a/frame/base/bli_gks.c b/frame/base/bli_gks.c index cfe0e839e..8a6c4ff38 100644 --- a/frame/base/bli_gks.c +++ b/frame/base/bli_gks.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2018 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -519,10 +519,10 @@ cntx_t* bli_gks_query_nat_cntx( void ) // Return the address of the native context for the architecture id // corresponding to the current hardware, as determined by - // bli_arch_query_id(). + // bli_arch_query_id_internal(). // Query the architecture id. - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // Use the architecture id to look up a pointer to its context. cntx_t* cntx = bli_gks_lookup_nat_cntx( arch_id ); @@ -538,7 +538,7 @@ cntx_t* bli_gks_query_cntx_noinit( void ) // does not call bli_init_once(). // Query the architecture id. - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // Use the architecture id to look up a pointer to its context. cntx_t* cntx = bli_gks_lookup_nat_cntx( arch_id ); @@ -566,7 +566,7 @@ cntx_t* bli_gks_query_ind_cntx // Return the address of a context that will be suited for executing a // level-3 operation via the requested induced method (and datatype) for // the architecture id corresponding to the current hardware, as - // determined by bli_arch_query_id(). + // determined by bli_arch_query_id_internal(). // This function is called when a level-3 operation via induced method is // called, e.g. bli_gemm1m(). If this is the first time that induced method @@ -577,7 +577,7 @@ cntx_t* bli_gks_query_ind_cntx // ensure thread safety and deterministic behavior. // Query the architecture id. - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // Sanity check: verify that the arch_t id is valid. if ( bli_error_checking_is_enabled() ) @@ -658,7 +658,7 @@ void bli_gks_init_ref_cntx ) { // Query the architecture id. - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // Sanity check: verify that the arch_t id is valid. if ( bli_error_checking_is_enabled() ) @@ -687,7 +687,7 @@ bool bli_gks_cntx_l3_nat_ukr_is_ref cntx_t ref_cntx; // Initialize a context with reference kernels for the arch_t id queried - // via bli_arch_query_id(). + // via bli_arch_query_id_internal(). bli_gks_init_ref_cntx( &ref_cntx ); // Query each context for the micro-kernel function pointer for the @@ -779,7 +779,7 @@ kimpl_t bli_gks_l3_ukr_impl_type( l3ukr_t ukr, ind_t method, num_t dt ) cntx_t ref_cntx_l; // Query the architecture id. - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // Sanity check: verify that the arch_t id is valid. if ( bli_error_checking_is_enabled() ) diff --git a/frame/base/bli_rntm.c b/frame/base/bli_rntm.c index 975a1a3ae..b50f326ca 100644 --- a/frame/base/bli_rntm.c +++ b/frame/base/bli_rntm.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2021 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2021 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -557,7 +557,7 @@ void bli_nthreads_optimum( dim_t k = bli_obj_width_after_trans(a); // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); if(arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) { @@ -1209,7 +1209,7 @@ void bli_nthreads_optimum( dim_t k = bli_obj_width_after_trans(a); // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); if( arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4 ) { @@ -1643,7 +1643,7 @@ void bli_nthreads_optimum( dim_t k = bli_obj_width_after_trans(a); // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); if( arch_id == BLIS_ARCH_ZEN5 ) { @@ -2255,7 +2255,7 @@ void bli_nthreads_optimum( dim_t n = bli_obj_width(c); // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); if (arch_id == BLIS_ARCH_ZEN5) { diff --git a/frame/compat/bla_amax_amd.c b/frame/compat/bla_amax_amd.c index acf25b894..2a71d6f11 100644 --- a/frame/compat/bla_amax_amd.c +++ b/frame/compat/bla_amax_amd.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2018 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -303,7 +303,7 @@ f77_int idamax_blis_impl cntx_t* cntx = NULL; // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // Function pointer declaration for the function // that will be used by this API diff --git a/frame/compat/bla_axpy_amd.c b/frame/compat/bla_axpy_amd.c index b5957b8ce..59ee40b59 100644 --- a/frame/compat/bla_axpy_amd.c +++ b/frame/compat/bla_axpy_amd.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2020 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -200,7 +200,7 @@ void saxpy_blis_impl cntx_t *cntx = NULL; // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // Function pointer declaration for the function // that will be used by this API @@ -341,7 +341,7 @@ void daxpy_blis_impl cntx_t *cntx = NULL; // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // Function pointer declaration for the function // that will be used by this API @@ -724,7 +724,7 @@ void zaxpy_blis_impl cntx_t *cntx = NULL; // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // Function pointer declaration for the function // that will be used by this API diff --git a/frame/compat/bla_copy_amd.c b/frame/compat/bla_copy_amd.c index b40b055ba..e7e6b8b02 100644 --- a/frame/compat/bla_copy_amd.c +++ b/frame/compat/bla_copy_amd.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2022 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -168,7 +168,7 @@ void scopy_blis_impl cntx_t *cntx = NULL; // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // Function pointer declaration for the function // that will be used by this API @@ -290,7 +290,7 @@ void dcopy_blis_impl cntx_t *cntx = NULL; // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // Function pointer declaration for the function // that will be used by this API @@ -552,7 +552,7 @@ void zcopy_blis_impl cntx_t *cntx = NULL; // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // Function pointer declaration for the function // that will be used by this API diff --git a/frame/compat/bla_dot_amd.c b/frame/compat/bla_dot_amd.c index 9f16edd91..25ee82b9c 100644 --- a/frame/compat/bla_dot_amd.c +++ b/frame/compat/bla_dot_amd.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2018 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -189,7 +189,7 @@ float sdot_blis_impl cntx_t *cntx = NULL; // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // Function pointer declaration for the function // that will be used by this API @@ -334,7 +334,7 @@ double ddot_blis_impl cntx_t *cntx = NULL; // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // Function pointer declaration for the function // that will be used by this API @@ -817,7 +817,7 @@ dcomplex zdotu_blis_impl cntx_t *cntx = NULL; // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // Function pointer declaration for the function // that will be used by this API @@ -1247,7 +1247,7 @@ dcomplex zdotc_blis_impl cntx_t *cntx = NULL; // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // Function pointer declaration for the function // that will be used by this API diff --git a/frame/compat/bla_gemm_amd.c b/frame/compat/bla_gemm_amd.c index de38dc4b9..07e36614f 100644 --- a/frame/compat/bla_gemm_amd.c +++ b/frame/compat/bla_gemm_amd.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -42,7 +42,7 @@ #define GEMM_BLIS_IMPL(ch, blasname) \ PASTEF77S(ch,blasname) ( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ - arch_t arch_id = bli_arch_query_id(); \ + arch_t arch_id = bli_arch_query_id_internal(); \ if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \ { \ bli_zero_zmm(); \ @@ -684,7 +684,7 @@ void dgemm_blis_impl err_t k1_status = BLIS_FAILURE; // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); if ( arch_id == BLIS_ARCH_ZEN || arch_id == BLIS_ARCH_ZEN2 || arch_id == BLIS_ARCH_ZEN3 ) @@ -821,7 +821,7 @@ void dgemm_blis_impl bool entry_to_small = false; /* AVX512 GEMM tiny path is performant enough to handle small skinny inputs on ZEN4/5 */ /* AVX2 gemm_small path is invoked on ZEN/2/3 only */ - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); if( arch_id == BLIS_ARCH_ZEN3 || arch_id == BLIS_ARCH_ZEN2 || arch_id == BLIS_ARCH_ZEN ) { @@ -929,7 +929,7 @@ void dgemm_ { dgemm_blis_impl(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); #if defined(BLIS_KERNELS_ZEN4) - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) { bli_zero_zmm(); @@ -1179,7 +1179,7 @@ void zgemm_blis_impl err_t k1_status = BLIS_FAILURE; // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); if ( arch_id == BLIS_ARCH_ZEN || arch_id == BLIS_ARCH_ZEN2 || arch_id == BLIS_ARCH_ZEN3 ) @@ -1335,7 +1335,7 @@ void zgemm_blis_impl #endif /* Query the architecture ID */ - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); /* Boolean to track the entry to small path */ bool entry_to_small = false; @@ -1491,7 +1491,7 @@ void zgemm_ { zgemm_blis_impl(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); #if defined(BLIS_KERNELS_ZEN4) - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) { bli_zero_zmm(); @@ -1741,7 +1741,7 @@ void cgemm_blis_impl if( ( k0 == 1 ) && bli_is_notrans( blis_transa ) && bli_is_notrans( blis_transb ) ) { // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); if ( ( arch_id == BLIS_ARCH_ZEN4 ) || ( arch_id == BLIS_ARCH_ZEN5 ) ) { @@ -1887,7 +1887,7 @@ void cgemm_ { cgemm_blis_impl(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); #if defined(BLIS_KERNELS_ZEN4) - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) { bli_zero_zmm(); @@ -2170,7 +2170,7 @@ void sgemm_ { sgemm_blis_impl(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); #if defined(BLIS_KERNELS_ZEN4) - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) { bli_zero_zmm(); @@ -2340,7 +2340,7 @@ void dzgemm_ { dzgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ); #if defined(BLIS_KERNELS_ZEN4) - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) { bli_zero_zmm(); diff --git a/frame/compat/bla_hemm.c b/frame/compat/bla_hemm.c index f9b2dc199..04c5692a9 100644 --- a/frame/compat/bla_hemm.c +++ b/frame/compat/bla_hemm.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -44,7 +44,7 @@ #define HEMM_BLIS_IMPL(ch, blasname) \ PASTEF77S(ch,blasname) ( side, uploa, m, n, alpha, a, lda, b, ldb, beta, c, ldc ); \ - arch_t arch_id = bli_arch_query_id(); \ + arch_t arch_id = bli_arch_query_id_internal(); \ if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \ { \ bli_zero_zmm(); \ diff --git a/frame/compat/bla_her2k.c b/frame/compat/bla_her2k.c index 648c2dd49..7ebfaa976 100644 --- a/frame/compat/bla_her2k.c +++ b/frame/compat/bla_her2k.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -44,7 +44,7 @@ #define HER2K_BLIS_IMPL(ch, blasname) \ PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ - arch_t arch_id = bli_arch_query_id(); \ + arch_t arch_id = bli_arch_query_id_internal(); \ if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \ { \ bli_zero_zmm(); \ diff --git a/frame/compat/bla_herk.c b/frame/compat/bla_herk.c index c83dd312c..c6ab671dd 100644 --- a/frame/compat/bla_herk.c +++ b/frame/compat/bla_herk.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -44,7 +44,7 @@ #define HERK_BLIS_IMPL(ch, blasname) \ PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, beta, c, ldc ); \ - arch_t arch_id = bli_arch_query_id(); \ + arch_t arch_id = bli_arch_query_id_internal(); \ if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \ { \ bli_zero_zmm(); \ diff --git a/frame/compat/bla_scal_amd.c b/frame/compat/bla_scal_amd.c index 7abce85b5..41f2d65c0 100644 --- a/frame/compat/bla_scal_amd.c +++ b/frame/compat/bla_scal_amd.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2020 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -157,7 +157,7 @@ void sscal_blis_impl cntx_t *cntx = NULL; // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // Function pointer declaration for the function // that will be used by this API @@ -252,7 +252,7 @@ void dscal_blis_impl cntx_t *cntx = NULL; // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // Function pointer declaration for the function // that will be used by this API @@ -455,7 +455,7 @@ void zdscal_blis_impl cntx_t *cntx = NULL; // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // Function pointer declaration for the function // that will be used by this API @@ -626,7 +626,7 @@ void cscal_blis_impl cntx_t* cntx = NULL; // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // Function pointer declaration for the function // that will be used by this API @@ -721,7 +721,7 @@ void zscal_blis_impl cntx_t* cntx = NULL; // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // Function pointer declaration for the function // that will be used by this API diff --git a/frame/compat/bla_symm.c b/frame/compat/bla_symm.c index d72c3497c..737aa060b 100644 --- a/frame/compat/bla_symm.c +++ b/frame/compat/bla_symm.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -44,7 +44,7 @@ #define SYMM_BLIS_IMPL(ch, blasname) \ PASTEF77S(ch,blasname) ( side, uploa, m, n, alpha, a, lda, b, ldb, beta, c, ldc ); \ - arch_t arch_id = bli_arch_query_id(); \ + arch_t arch_id = bli_arch_query_id_internal(); \ if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \ { \ bli_zero_zmm(); \ diff --git a/frame/compat/bla_syr2k.c b/frame/compat/bla_syr2k.c index 807ec594e..683c4a427 100644 --- a/frame/compat/bla_syr2k.c +++ b/frame/compat/bla_syr2k.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin. - Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -44,7 +44,7 @@ #define SYR2K_BLIS_IMPL(ch, blasname) \ PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ - arch_t arch_id = bli_arch_query_id(); \ + arch_t arch_id = bli_arch_query_id_internal(); \ if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \ { \ bli_zero_zmm(); \ diff --git a/frame/compat/bla_syrk.c b/frame/compat/bla_syrk.c index 27a508c9d..2f4e092ae 100644 --- a/frame/compat/bla_syrk.c +++ b/frame/compat/bla_syrk.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin. - Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -44,7 +44,7 @@ #define SYRK_BLIS_IMPL(ch, blasname) \ PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, beta, c, ldc ); \ - arch_t arch_id = bli_arch_query_id(); \ + arch_t arch_id = bli_arch_query_id_internal(); \ if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \ { \ bli_zero_zmm(); \ diff --git a/frame/compat/bla_trmm.c b/frame/compat/bla_trmm.c index 767fa45c8..e4e7d6c6f 100644 --- a/frame/compat/bla_trmm.c +++ b/frame/compat/bla_trmm.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin. - Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -43,7 +43,7 @@ #define TRMM_BLIS_IMPL(ch, blasname) \ PASTEF77S(ch,blasname) ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); \ - arch_t arch_id = bli_arch_query_id(); \ + arch_t arch_id = bli_arch_query_id_internal(); \ if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \ { \ bli_zero_zmm(); \ diff --git a/frame/compat/bla_trsm_amd.c b/frame/compat/bla_trsm_amd.c index 797ea84d7..98ab4072d 100644 --- a/frame/compat/bla_trsm_amd.c +++ b/frame/compat/bla_trsm_amd.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -44,7 +44,7 @@ #define TRSM_BLIS_IMPL(ch, blasname) \ PASTEF77S(ch,blasname) ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); \ - arch_t arch_id = bli_arch_query_id(); \ + arch_t arch_id = bli_arch_query_id_internal(); \ if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \ { \ bli_zero_zmm(); \ @@ -847,7 +847,7 @@ void strsm_ { strsm_blis_impl ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); #if defined(BLIS_KERNELS_ZEN4) - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) { bli_zero_zmm(); @@ -1139,7 +1139,7 @@ void dtrsm_blis_impl dtrsm_small_ker_ft trsm_ker_ptr = NULL; // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // dimensions of triangular matrix // for left variants, dim_a is m0, @@ -1351,7 +1351,7 @@ void dtrsm_ { dtrsm_blis_impl ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); #if defined(BLIS_KERNELS_ZEN4) - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) { bli_zero_zmm(); @@ -1706,7 +1706,7 @@ void ztrsm_blis_impl ztrsm_small_ker_ft trsm_ker_ptr = NULL; // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); bool is_parallel = bli_thread_get_is_parallel(); @@ -1895,7 +1895,7 @@ void ztrsm_ { ztrsm_blis_impl ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); #if defined(BLIS_KERNELS_ZEN4) - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) { bli_zero_zmm(); @@ -2306,7 +2306,7 @@ void ctrsm_ { ctrsm_blis_impl ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); #if defined(BLIS_KERNELS_ZEN4) - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) { bli_zero_zmm(); diff --git a/frame/compat/extra/bla_axpby_amd.c b/frame/compat/extra/bla_axpby_amd.c index 89aad1497..6ab27e79c 100644 --- a/frame/compat/extra/bla_axpby_amd.c +++ b/frame/compat/extra/bla_axpby_amd.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2024 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2024 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -176,7 +176,7 @@ void saxpby_blis_impl cntx_t *cntx = NULL; // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // Function pointer declaration for the function // that will be used by this API @@ -308,7 +308,7 @@ void daxpby_blis_impl cntx_t *cntx = NULL; // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // Function pointer declaration for the function // that will be used by this API @@ -445,7 +445,7 @@ void caxpby_blis_impl cntx_t *cntx = NULL; // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // Function pointer declaration for the function // that will be used by this API @@ -577,7 +577,7 @@ void zaxpby_blis_impl cntx_t *cntx = NULL; // Query the architecture ID - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); // Function pointer declaration for the function // that will be used by this API diff --git a/frame/compat/extra/bla_gemmt.c b/frame/compat/extra/bla_gemmt.c index 1666db395..50beda224 100644 --- a/frame/compat/extra/bla_gemmt.c +++ b/frame/compat/extra/bla_gemmt.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2020 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -42,7 +42,7 @@ #define GEMMT_BLIS_IMPL(ch, blasname) \ PASTEF77S(ch,blasname) ( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc ); \ - arch_t arch_id = bli_arch_query_id(); \ + arch_t arch_id = bli_arch_query_id_internal(); \ if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \ { \ bli_zero_zmm(); \ diff --git a/frame/util/bli_util_unb_var1.c b/frame/util/bli_util_unb_var1.c index 187fe6b55..2272cdb26 100644 --- a/frame/util/bli_util_unb_var1.c +++ b/frame/util/bli_util_unb_var1.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2018 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -322,7 +322,7 @@ void bli_cnormfv_unb_var1 inc_t incx_buf = incx; // Querying the architecture ID to deploy the appropriate kernel - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); switch ( arch_id ) { case BLIS_ARCH_ZEN5: @@ -460,7 +460,7 @@ void bli_znormfv_unb_var1 dim_t simd_factor = 1; #endif - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); switch ( arch_id ) { case BLIS_ARCH_ZEN5: @@ -939,7 +939,7 @@ void bli_snormfv_unb_var1 inc_t incx_buf = incx; // Querying the architecture ID to deploy the appropriate kernel - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); switch ( arch_id ) { case BLIS_ARCH_ZEN5: @@ -1084,7 +1084,7 @@ void bli_dnormfv_unb_var1 dim_t nt_ideal = -1; #endif - arch_t arch_id = bli_arch_query_id(); + arch_t arch_id = bli_arch_query_id_internal(); switch ( arch_id ) { case BLIS_ARCH_ZEN5: diff --git a/kernels/zen4/1f/bli_axpyf_zen4_int.c b/kernels/zen4/1f/bli_axpyf_zen4_int.c index 5bacbd38f..232bd7c4b 100644 --- a/kernels/zen4/1f/bli_axpyf_zen4_int.c +++ b/kernels/zen4/1f/bli_axpyf_zen4_int.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2024 - 2025, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2024 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -377,7 +377,7 @@ void bli_daxpyf_zen4_int_32_mt BLIS_AXPYF_KER, BLIS_DOUBLE, BLIS_DOUBLE, - bli_arch_query_id(), + bli_arch_query_id_internal(), m, &nt ); diff --git a/kernels/zen4/2/bli_gemv_n_zen4_int.c b/kernels/zen4/2/bli_gemv_n_zen4_int.c index 7c185034b..0bd69e7d4 100644 --- a/kernels/zen4/2/bli_gemv_n_zen4_int.c +++ b/kernels/zen4/2/bli_gemv_n_zen4_int.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2025-26, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2025 - 2026, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -567,7 +567,7 @@ GENT_GEMV_CALLER(double, d, 40, 2, n, st); BLIS_GEMV_KER, \ PASTEMAC(ch,type), \ BLIS_NO_TRANSPOSE, \ - bli_arch_query_id(), \ + bli_arch_query_id_internal(), \ m, \ n, \ &nt \ @@ -1034,7 +1034,7 @@ GENT_GEMV_CALLER(double, d, 40, 8, m); BLIS_GEMV_KER, BLIS_DOUBLE, //PASTEMAC(d,type), BLIS_NO_TRANSPOSE, - bli_arch_query_id(), + bli_arch_query_id_internal(), m, n, &nt @@ -1190,7 +1190,7 @@ void bli_dgemv_m_zen4_int_40x8_mt_Mdiv BLIS_GEMV_KER, PASTEMAC(d,type), BLIS_NO_TRANSPOSE, - bli_arch_query_id(), + bli_arch_query_id_internal(), m, n, &nt @@ -1313,7 +1313,7 @@ void bli_dgemv_m_zen4_int_40x8_mt_Ndiv BLIS_GEMV_KER, PASTEMAC(d,type), BLIS_NO_TRANSPOSE, - bli_arch_query_id(), + bli_arch_query_id_internal(), m, n, &nt