Optimize bli_arch_query_id and related functions

bli_arch_query_id() is used to select kernels in optimized BLAS APIs. Previous
implementation incurred the overhead of multiple function calls. This has
been reduced by:
- Changing the function to be defined in a header file so it can be inlined.
- Avoiding call to bli_arch_check_id_once that was a wrapper for a call to
  bli_pthread_once. Instead bli_pthread_once is called directly.
- For builds with a single BLIS sub-configuration, correct arch_id is taken
  directly from a header file in the corresponding config subdirectory,
  avoiding the bli_pthread_once call and making the value explicit at
  compile time, which may enable additional optimizations.

To enable these changes, the variables arch_id and model_id defined in
frame/base/bli_arch.c are no longer static, as they must be accessed in multiple
files (i.e. they are now global variables). Rename to g_arch_id and g_model_id
to distinguish from any locally defined arch_id or model_id variables.
This commit is contained in:
Smyth, Edward
2026-02-04 13:16:46 +00:00
committed by GitHub
parent ebf8721a5c
commit 8310b2d5d3
66 changed files with 318 additions and 301 deletions

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2022 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -235,7 +235,7 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,bfloat16,float,bf16bf16f32obf16)
* as the tiny path for BF16->FP32 is not available. Hence the arch_id also has to be
* verified here.
*/
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
if( ( bli_cpuid_is_avx512bf16_supported() == TRUE ) && ( ( arch_id == BLIS_ARCH_ZEN4 ) || ( arch_id == BLIS_ARCH_ZEN5 ) ) && ( is_single_thread( &rntm_g ) == TRUE) )
{
if( ( is_row_major == TRUE ) &&

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2022 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -240,7 +240,7 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,float,float,bf16bf16f32of32)
* as the tiny path for BF16->FP32 is not available. Hence the arch_id also has to be
* verified here.
*/
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
if( ( bli_cpuid_is_avx512bf16_supported() == TRUE ) && ( ( arch_id == BLIS_ARCH_ZEN4 ) || ( arch_id == BLIS_ARCH_ZEN5 ) ) &&
( is_tiny_input_bf16of32( m, n, k, lcntx_g ) == TRUE ) &&
( is_single_thread( &rntm_g ) == TRUE) &&

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2022 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -102,7 +102,7 @@ static bli_pthread_once_t once_check_lpgemm_func_map_init = BLIS_PTHREAD_ONCE_IN
static void _lpgemm_init_enable_arch()
{
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
bool enbl_instr = bli_aocl_enable_instruction_query();
if ( ( enbl_instr == TRUE ) &&

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -35,6 +36,7 @@
//#ifndef BLIS_FAMILY_H
//#define BLIS_FAMILY_H
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_A64FX
// -- MEMORY ALLOCATION --------------------------------------------------------

View File

@@ -35,7 +35,6 @@
//#ifndef BLIS_FAMILY_H
//#define BLIS_FAMILY_H
// -- MEMORY ALLOCATION --------------------------------------------------------
#define BLIS_SIMD_ALIGN_SIZE 16

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -35,6 +36,7 @@
//#ifndef BLIS_FAMILY_H
//#define BLIS_FAMILY_H
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_ARMSVE
// -- MEMORY ALLOCATION --------------------------------------------------------

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -35,6 +36,7 @@
//#ifndef BLIS_FAMILY_H
//#define BLIS_FAMILY_H
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_BGQ
#undef restrict

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -35,6 +36,7 @@
//#ifndef BLIS_FAMILY_H
//#define BLIS_FAMILY_H
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_BULLDOZER
#if 0

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -35,6 +36,7 @@
//#ifndef BLIS_FAMILY_H
//#define BLIS_FAMILY_H
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_CORTEXA15
// -- MEMORY ALLOCATION --------------------------------------------------------

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -32,6 +33,7 @@
*/
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_CORTEXA53
// -- MEMORY ALLOCATION --------------------------------------------------------

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -35,6 +36,7 @@
//#ifndef BLIS_FAMILY_H
//#define BLIS_FAMILY_H
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_CORTEXA57
// -- MEMORY ALLOCATION --------------------------------------------------------

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -35,6 +36,7 @@
//#ifndef BLIS_FAMILY_H
//#define BLIS_FAMILY_H
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_CORTEXA9
// -- MEMORY ALLOCATION --------------------------------------------------------

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -35,6 +36,7 @@
//#ifndef BLIS_FAMILY_H
//#define BLIS_FAMILY_H
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_EXCAVATOR
// -- MEMORY ALLOCATION --------------------------------------------------------

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -35,6 +36,7 @@
//#ifndef BLIS_FAMILY_H
//#define BLIS_FAMILY_H
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_FIRESTORM
// -- MEMORY ALLOCATION --------------------------------------------------------

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2025 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -35,8 +36,7 @@
//#ifndef BLIS_FAMILY_H
//#define BLIS_FAMILY_H
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_GENERIC
//#endif

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2019 - 2023, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -36,6 +36,7 @@
//#ifndef BLIS_FAMILY_H
//#define BLIS_FAMILY_H
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_HASWELL
#if 0
// -- LEVEL-3 MICRO-KERNEL CONSTANTS AND DEFINITIONS ---------------------------

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -35,6 +36,7 @@
//#ifndef BLIS_FAMILY_H
//#define BLIS_FAMILY_H
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_KNC
// -- THREADING PARAMTERS ------------------------------------------------------

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -35,6 +36,7 @@
//#ifndef BLIS_FAMILY_H
//#define BLIS_FAMILY_H
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_KNL
// -- THREADING PARAMETERS -----------------------------------------------------

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -35,6 +36,7 @@
//#ifndef BLIS_FAMILY_H
//#define BLIS_FAMILY_H
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_PENRYN
// -- MEMORY ALLOCATION --------------------------------------------------------

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -35,6 +36,7 @@
//#ifndef BLIS_FAMILY_H
//#define BLIS_FAMILY_H
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_PILEDRIVER
// -- MEMORY ALLOCATION --------------------------------------------------------

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2019, The University of Texas at Austin
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -32,6 +33,8 @@
*/
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_POWER10
#define BLIS_POOL_ADDR_ALIGN_SIZE_A 4096
#define BLIS_POOL_ADDR_ALIGN_SIZE_B 4096

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -35,6 +36,7 @@
//#ifndef BLIS_FAMILY_H
//#define BLIS_FAMILY_H
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_POWER7
#if 0
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2019, The University of Texas at Austin
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -32,6 +33,8 @@
*/
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_POWER9
#define BLIS_POOL_ADDR_ALIGN_SIZE_A 4096
#define BLIS_POOL_ADDR_ALIGN_SIZE_B 4096

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -35,6 +36,7 @@
//#ifndef BLIS_FAMILY_H
//#define BLIS_FAMILY_H
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_SANDYBRIDGE
#if 0
// -- LEVEL-3 MICRO-KERNEL CONSTANTS AND DEFINITIONS ---------------------------

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -35,6 +36,8 @@
//#ifndef BLIS_FAMILY_H
//#define BLIS_FAMILY_H
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_SKX
// -- THREADING PARAMETERS -----------------------------------------------------
#define BLIS_THREAD_RATIO_M 3

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -35,6 +36,7 @@
//#ifndef BLIS_FAMILY_H
//#define BLIS_FAMILY_H
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_STEAMROLLER
// -- MEMORY ALLOCATION --------------------------------------------------------

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -35,6 +36,7 @@
//#ifndef BLIS_FAMILY_H
//#define BLIS_FAMILY_H
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_THUNDERX2
// -- MEMORY ALLOCATION --------------------------------------------------------

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -36,6 +36,8 @@
#ifndef BLIS_FAMILY_ZEN_H
#define BLIS_FAMILY_ZEN_H
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_ZEN
#include "bli_config_zen.h"
// By default, it is effective to parallelize the outer loops.

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -33,8 +33,10 @@
*/
#ifndef BLI_FAMILY_ZEN2_
#define BLI_FAMILY_ZEN2_
#ifndef BLIS_FAMILY_ZEN2_H
#define BLIS_FAMILY_ZEN2_H
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_ZEN2
// By default, it is effective to parallelize the outer loops.
// Setting these macros to 1 will force JR and IR inner loops

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2020 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -33,8 +33,10 @@
*/
#ifndef BLI_FAMILY_ZEN3_
#define BLI_FAMILY_ZEN3_
#ifndef BLIS_FAMILY_ZEN3_H
#define BLIS_FAMILY_ZEN3_H
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_ZEN3
// By default, it is effective to parallelize the outer loops.
// Setting these macros to 1 will force JR and IR inner loops

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2021 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2021 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -32,8 +32,10 @@
*/
#ifndef BLI_FAMILY_ZEN4_
#define BLI_FAMILY_ZEN4_
#ifndef BLIS_FAMILY_ZEN4_H
#define BLIS_FAMILY_ZEN4_H
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_ZEN4
#include "bli_config_zen4.h"

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2024 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2024 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -32,8 +32,10 @@
*/
#ifndef BLI_FAMILY_ZEN5_
#define BLI_FAMILY_ZEN5_
#ifndef BLIS_FAMILY_ZEN5_H
#define BLIS_FAMILY_ZEN5_H
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_ZEN5
#include "bli_config_zen5.h"

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -187,7 +187,7 @@ void bli_packm_blk_var1
// For DGEMM in AVX512, scale by alpha during packing
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
if
(

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2020 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -292,7 +292,7 @@ void bli_dgemv_unf_var1
the support of AVX512 or AVX2, if AVX512 - arch_id will be zen4
and zen5 or for AVX2 it will be zen3.
*/
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
#if defined(BLIS_ENABLE_OPENMP) && defined(AOCL_DYNAMIC)
// Setting the threshold to invoke the fast-path
@@ -892,7 +892,7 @@ void bli_zgemv_unf_var1
the support of AVX512 or AVX2, if AVX512 - arch_id will be zen4
or for AVX2 it will be zen3.
*/
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
switch ( arch_id )
{

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2020 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -271,7 +271,7 @@ void bli_dgemv_unf_var2 (
the support of AVX512 or AVX2, if AVX512 - arch_id will be zen4
or for AVX2 it will be zen3.
*/
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// b_fuse stores the fusing factor for AXPYF kernel.
dim_t b_fuse;
@@ -707,7 +707,7 @@ void bli_zgemv_unf_var2
conja = bli_extract_conj(transa);
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
/*
Function pointer declaration for the functions

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -304,7 +304,7 @@ void bli_dtrsv_unf_var1
if (bli_cpuid_is_avx2fma3_supported() == TRUE) {
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
switch ( arch_id )
{

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -310,7 +310,7 @@ void bli_dtrsv_unf_var2
#if defined(BLIS_KERNELS_ZEN4)
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4 )
{

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2022 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -242,7 +242,7 @@ static err_t bli_gemm_ic_jc_optimum_sup_arch_dispatcher
err_t ret_val = BLIS_FAILURE;
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
if ( arch_id == BLIS_ARCH_ZEN3 )
{

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -109,7 +109,7 @@ err_t bli_gemmsup
#if defined(BLIS_FAMILY_ZEN5) || defined(BLIS_FAMILY_ZEN4) || defined(BLIS_FAMILY_AMDZEN) || defined(BLIS_FAMILY_X86_64)
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
if(( arch_id == BLIS_ARCH_ZEN5 ) || ( arch_id == BLIS_ARCH_ZEN4 ))
{

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -139,7 +139,7 @@ err_t bli_gemmsup_int
//Enable packing of B matrix for double data type when dims at per
//thread level are above caches and enable packing of A when transA
//(RRC or CRC storage ids) to avoid rd kernels
if(bli_is_double(dt) && (bli_arch_query_id() == BLIS_ARCH_ZEN3))
if(bli_is_double(dt) && (bli_arch_query_id_internal() == BLIS_ARCH_ZEN3))
{
dim_t m_pt = (m/bli_rntm_ways_for( BLIS_MC, rntm ));
dim_t n_pt = (n/bli_rntm_ways_for( BLIS_NC, rntm ));
@@ -217,7 +217,7 @@ err_t bli_gemmsup_int
//Enable packing of B matrix for double data type when dims at per
//thread level are above caches and enable packing of A when transA
//(RRC or CRC storage ids) to avoid rd kernels
if(bli_is_double(dt) && (bli_arch_query_id() == BLIS_ARCH_ZEN3))
if(bli_is_double(dt) && (bli_arch_query_id_internal() == BLIS_ARCH_ZEN3))
{
dim_t m_pt = (m/bli_rntm_ways_for( BLIS_NC, rntm ));
dim_t n_pt = (n/bli_rntm_ways_for( BLIS_MC, rntm ));
@@ -427,7 +427,7 @@ err_t bli_gemmtsup_int
/* Enable packing for A matrix for higher sizes. Note that pack A
* * becomes pack B inside var2m because this is transpose case*/
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
/* Do not pack A for ZEN4 and ZEN5 because the GEMM kernels
* used are column major and GEMMT kernels used are row major.
* Packing matrix A makes matrix B in the GEMMT kernels column

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -61,7 +61,7 @@ void bli_gemm_front
// TODO : As part of future work, we have to retune the entry conditions
// to native(ZEN3/ZEN2/ZEN), and remove the need for dynamic threading
// here (GitHub Issue #114).
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
if( bli_obj_is_dcomplex( c ) && ( ( arch_id == BLIS_ARCH_ZEN3 ) ||
( arch_id == BLIS_ARCH_ZEN2 ) || ( arch_id == BLIS_ARCH_ZEN ) ) )
{
@@ -318,7 +318,7 @@ void bli_gemm_front
if ( bli_obj_dt( &c_local ) == bli_obj_dt( &a_local ) &&
bli_obj_dt( &c_local ) == bli_obj_dt( &b_local ) )
{
switch (bli_arch_query_id() )
switch (bli_arch_query_id_internal() )
{
case BLIS_ARCH_ZEN5:
#if defined(BLIS_KERNELS_ZEN5)

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -178,7 +178,7 @@ void bli_gemm_ker_var2
// TODO: Add macro kernel function pointer in cntx
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
if
(

View File

@@ -74,7 +74,7 @@ err_t PASTEMAC( ch, tfuncname ) \
return BLIS_FAILURE; \
\
/* Query the architecture ID */ \
arch_t arch_id = bli_arch_query_id(); \
arch_t arch_id = bli_arch_query_id_internal(); \
/* Declaring the object to hold the kernel information */ \
gemmtiny_ukr_info_t gemmtiny_ukr_info; \
/* Variable to flag success/failure of obtaining the kernel */ \
@@ -483,7 +483,7 @@ err_t bli_dgemm_tiny
)
{
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
bool is_mt = bli_thread_get_is_parallel();
{
// Pick the kernel based on the architecture ID

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2024, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -73,8 +73,8 @@ static model_t actual_model_id = -1;
// The arch and model ids for the currently running hardware, or the values
// the user specifies to use. We initialize to -1, which will be overwritten
// upon calling bli_arch_set_id().
static arch_t arch_id = -1;
static model_t model_id = -1;
arch_t g_arch_id = -1;
model_t g_model_id = -1;
// Variable used to communicate if user has set '__blis_arch_type_name' between
// bli_arch_set_id() and bli_arch_check_id()
@@ -91,48 +91,25 @@ bool bli_aocl_enable_instruction_query( void )
arch_t bli_arch_query_id( void )
{
bli_arch_check_id_once();
// Simply return the id that was previously cached.
return arch_id;
return bli_arch_query_id_internal();
}
model_t bli_model_query_id( void )
{
bli_arch_check_id_once();
// Simply return the model_id that was previously cached.
return model_id;
return bli_model_query_id_internal();
}
model_t bli_init_model_query_id( void )
{
bli_arch_set_id_once();
// Simply return the model_id that was previously cached.
return model_id;
return bli_init_model_query_id_internal();
}
// -----------------------------------------------------------------------------
// A pthread structure used in pthread_once(). pthread_once() is guaranteed to
// execute exactly once among all threads that pass in this control object.
static bli_pthread_once_t once_id_init = BLIS_PTHREAD_ONCE_INIT;
static bli_pthread_once_t once_id_check = BLIS_PTHREAD_ONCE_INIT;
void bli_arch_set_id_once( void )
{
#ifndef BLIS_CONFIGURETIME_CPUID
bli_pthread_once( &once_id_init, bli_arch_set_id );
#endif
}
void bli_arch_check_id_once( void )
{
#ifndef BLIS_CONFIGURETIME_CPUID
bli_pthread_once( &once_id_check, bli_arch_check_id );
#endif
}
bli_pthread_once_t once_id_init = BLIS_PTHREAD_ONCE_INIT;
bli_pthread_once_t once_id_check = BLIS_PTHREAD_ONCE_INIT;
// -----------------------------------------------------------------------------
@@ -209,7 +186,7 @@ void bli_arch_set_id( void )
// bli_arch_check_id() called later.
// For now, we can only be confident that req_id is in range.
arch_id = req_id;
g_arch_id = req_id;
}
else
#endif
@@ -226,101 +203,19 @@ void bli_arch_set_id( void )
defined BLIS_FAMILY_X86_64 || \
defined BLIS_FAMILY_ARM64 || \
defined BLIS_FAMILY_ARM32
arch_id = actual_arch_id;
#endif
// Intel microarchitectures.
#ifdef BLIS_FAMILY_SKX
arch_id = BLIS_ARCH_SKX;
#endif
#ifdef BLIS_FAMILY_KNL
arch_id = BLIS_ARCH_KNL;
#endif
#ifdef BLIS_FAMILY_KNC
arch_id = BLIS_ARCH_KNC;
#endif
#ifdef BLIS_FAMILY_HASWELL
arch_id = BLIS_ARCH_HASWELL;
#endif
#ifdef BLIS_FAMILY_SANDYBRIDGE
arch_id = BLIS_ARCH_SANDYBRIDGE;
#endif
#ifdef BLIS_FAMILY_PENRYN
arch_id = BLIS_ARCH_PENRYN;
#endif
// AMD microarchitectures.
#ifdef BLIS_FAMILY_ZEN5
arch_id = BLIS_ARCH_ZEN5;
#endif
#ifdef BLIS_FAMILY_ZEN4
arch_id = BLIS_ARCH_ZEN4;
#endif
#ifdef BLIS_FAMILY_ZEN3
arch_id = BLIS_ARCH_ZEN3;
#endif
#ifdef BLIS_FAMILY_ZEN2
arch_id = BLIS_ARCH_ZEN2;
#endif
#ifdef BLIS_FAMILY_ZEN
arch_id = BLIS_ARCH_ZEN;
#endif
#ifdef BLIS_FAMILY_EXCAVATOR
arch_id = BLIS_ARCH_EXCAVATOR;
#endif
#ifdef BLIS_FAMILY_STEAMROLLER
arch_id = BLIS_ARCH_STEAMROLLER;
#endif
#ifdef BLIS_FAMILY_PILEDRIVER
arch_id = BLIS_ARCH_PILEDRIVER;
#endif
#ifdef BLIS_FAMILY_BULLDOZER
arch_id = BLIS_ARCH_BULLDOZER;
#endif
// ARM microarchitectures.
#ifdef BLIS_FAMILY_ARMSVE
arch_id = BLIS_ARCH_ARMSVE;
#endif
#ifdef BLIS_FAMILY_A64FX
arch_id = BLIS_ARCH_A64FX;
#endif
#ifdef BLIS_FAMILY_FIRESTORM
id = BLIS_ARCH_FIRESTORM;
#endif
#ifdef BLIS_FAMILY_THUNDERX2
arch_id = BLIS_ARCH_THUNDERX2;
#endif
#ifdef BLIS_FAMILY_CORTEXA57
arch_id = BLIS_ARCH_CORTEXA57;
#endif
#ifdef BLIS_FAMILY_CORTEXA53
arch_id = BLIS_ARCH_CORTEXA53;
#endif
#ifdef BLIS_FAMILY_CORTEXA15
arch_id = BLIS_ARCH_CORTEXA15;
#endif
#ifdef BLIS_FAMILY_CORTEXA9
arch_id = BLIS_ARCH_CORTEXA9;
#endif
// IBM microarchitectures.
#ifdef BLIS_FAMILY_POWER10
arch_id = BLIS_ARCH_POWER10;
#endif
#ifdef BLIS_FAMILY_POWER9
arch_id = BLIS_ARCH_POWER9;
#endif
#ifdef BLIS_FAMILY_POWER7
arch_id = BLIS_ARCH_POWER7;
#endif
#ifdef BLIS_FAMILY_BGQ
arch_id = BLIS_ARCH_BGQ;
#endif
// Generic microarchitecture.
#ifdef BLIS_FAMILY_GENERIC
arch_id = BLIS_ARCH_GENERIC;
g_arch_id = actual_arch_id;
#else
#ifdef BLIS_FAMILY_TO_ARCH_VALUE
// For single sub-configuration builds, get value from header file
g_arch_id = BLIS_FAMILY_TO_ARCH_VALUE;
#else
// For "auto" build, initialize to generic as starting point.
// It will then determine the correct architecture and set
// BLIS_FAMILY_TO_ARCH_VALUE. This will also be the fallback
// if BLIS_FAMILY_TO_ARCH_VALUE is not set in the relevant
// config header file.
g_arch_id = BLIS_ARCH_GENERIC;
#endif
#endif
}
@@ -336,14 +231,14 @@ void bli_arch_set_id( void )
if ( req_model != -1 )
{
// BLIS_MODEL_TYPE was set. Cautiously check whether its value is usable.
// Assume here that arch_id is valid.
// Assume here that g_arch_id is valid.
// If req_model was set to an invalid model_t value (ie: both outside
// the range appropriate for the given architecture and not default),
// set to default value and continue.
if ( bli_error_checking_is_enabled() )
{
err_t e_val = bli_check_valid_model_id( arch_id, req_model );
err_t e_val = bli_check_valid_model_id( g_arch_id, req_model );
if (e_val != BLIS_SUCCESS)
{
req_model = BLIS_MODEL_DEFAULT;
@@ -354,7 +249,7 @@ void bli_arch_set_id( void )
// We can now be confident that req_model is in range for the
// selected architecture, or it has been reset to be default.
model_id = req_model;
g_model_id = req_model;
}
else
#endif
@@ -363,25 +258,27 @@ void bli_arch_set_id( void )
{
// BLIS_MODEL_TYPE was unset. Proceed with normal subconfiguration
// selection behavior, based on value of architecture id selected
// above. Unlike for arch_id, we cannot simply use actual_model_id
// here, as we need to choose model_id based on the arch_id we are
// above. Unlike for g_arch_id, we cannot simply use actual_model_id
// here, as we need to choose g_model_id based on the g_arch_id we are
// using, which could be different to actual_arch_id.
model_id = bli_cpuid_query_model_id( arch_id );
g_model_id = bli_cpuid_query_model_id( g_arch_id );
}
//printf( "blis_arch_query_id(): arch_id, model_id = %u, %u\n", arch_id, model_id );
//printf( "blis_arch_query_id(): g_arch_id, g_model_id = %u, %u\n", g_arch_id, g_model_id );
//exit(1);
}
void bli_arch_check_id( void )
{
bli_arch_set_id_once();
#ifndef BLIS_CONFIGURETIME_CPUID
bli_pthread_once( &once_id_init, bli_arch_set_id );
#endif
bool arch_not_in_build = FALSE;
bool arch_reset = FALSE;
arch_t orig_arch_id= req_id;
model_t orig_model_id = model_id;
model_t orig_model_id = g_model_id;
// Check arch value against configured options. Only needed
// if user has set it. This function will also do the
@@ -424,7 +321,7 @@ void bli_arch_check_id( void )
arch_not_in_build = TRUE;
arch_reset = TRUE;
req_id = actual_arch_id;
model_id = actual_model_id;
g_model_id = actual_model_id;
}
}
@@ -450,7 +347,7 @@ void bli_arch_check_id( void )
case BLIS_ARCH_HASWELL:
arch_reset = TRUE;
req_id = actual_arch_id;
model_id = actual_model_id;
g_model_id = actual_model_id;
break;
}
}
@@ -469,7 +366,7 @@ void bli_arch_check_id( void )
case BLIS_ARCH_SKX:
arch_reset = TRUE;
req_id = actual_arch_id;
model_id = actual_model_id;
g_model_id = actual_model_id;
break;
}
}
@@ -498,12 +395,12 @@ void bli_arch_check_id( void )
bli_check_error_code( e_val );
}
// If BLIS_ARCH_TYPE (or renamed version of this environment variable)
// was set, we always use this value of req_id to set arch_id.
// was set, we always use this value of req_id to set g_arch_id.
}
// Finally, we can be confident that req_id (1) is in range and (2)
// refers to a context that has been initialized.
arch_id = req_id;
g_arch_id = req_id;
}
#endif
@@ -514,18 +411,18 @@ void bli_arch_check_id( void )
if ( req_id == -1 && aocl_e_i)
{
// AOCL_ENABLE_INSTRUCTIONS was set to an invalid value
// normal system arch_id was used instead.
if ( model_id == BLIS_MODEL_DEFAULT )
// normal system g_arch_id was used instead.
if ( g_model_id == BLIS_MODEL_DEFAULT )
{
fprintf( stderr, "libblis: AOCL_ENABLE_INSTRUCTIONS env var was set to an invalid value.\n"
"libblis: Selecting system default sub-configuration '%s'.\n",
bli_arch_string( arch_id ) );
bli_arch_string( g_arch_id ) );
}
else
{
fprintf( stderr, "libblis: AOCL_ENABLE_INSTRUCTIONS env var was set to an invalid value.\n"
"libblis: Selecting system default sub-configuration '%s', model '%s'.\n",
bli_arch_string( arch_id ), bli_model_string( model_id ) );
bli_arch_string( g_arch_id ), bli_model_string( g_model_id ) );
}
}
else if ( arch_not_in_build )
@@ -534,13 +431,13 @@ void bli_arch_check_id( void )
{
fprintf( stderr, "libblis: Sub-configuration '%s' is not implemented in this build.\n"
"libblis: Selecting system default sub-configuration '%s'.\n",
bli_arch_string( orig_arch_id ), bli_arch_string( arch_id ) );
bli_arch_string( orig_arch_id ), bli_arch_string( g_arch_id ) );
}
else
{
fprintf( stderr, "libblis: Sub-configuration '%s', model '%s' is not implemented in this build.\n"
"libblis: Selecting system default sub-configuration '%s', model '%s'.\n",
bli_arch_string( orig_arch_id ), bli_model_string( orig_model_id ), bli_arch_string( arch_id ), bli_model_string( model_id ) );
bli_arch_string( orig_arch_id ), bli_model_string( orig_model_id ), bli_arch_string( g_arch_id ), bli_model_string( g_model_id ) );
}
}
else if ( arch_reset )
@@ -549,27 +446,27 @@ void bli_arch_check_id( void )
{
fprintf( stderr, "libblis: Sub-configuration '%s' is not supported on this system.\n"
"libblis: Selecting system default sub-configuration '%s'.\n",
bli_arch_string( orig_arch_id ), bli_arch_string( arch_id ) );
bli_arch_string( orig_arch_id ), bli_arch_string( g_arch_id ) );
}
else
{
fprintf( stderr, "libblis: Sub-configuration '%s', model '%s' is not supported on this system.\n"
"libblis: Selecting system default sub-configuration '%s', model '%s'.\n",
bli_arch_string( orig_arch_id ), bli_model_string( orig_model_id ), bli_arch_string( arch_id ), bli_model_string( model_id ) );
bli_arch_string( orig_arch_id ), bli_model_string( orig_model_id ), bli_arch_string( g_arch_id ), bli_model_string( g_model_id ) );
}
}
else
{
if ( model_id == BLIS_MODEL_DEFAULT )
if ( g_model_id == BLIS_MODEL_DEFAULT )
{
#ifdef DISABLE_BLIS_ARCH_TYPE
fprintf( stderr, "libblis: Selecting sub-configuration '%s'.\n"
"libblis: User control of sub-configuration using AOCL_ENABLE_INSTRUCTIONS\n"
"libblis: or using "__blis_arch_type_name" and "__blis_model_type_name" is disabled.\n",
bli_arch_string( arch_id ) );
bli_arch_string( g_arch_id ) );
#else
fprintf( stderr, "libblis: Selecting sub-configuration '%s'.\n",
bli_arch_string( arch_id ) );
bli_arch_string( g_arch_id ) );
#endif
}
else
@@ -578,10 +475,10 @@ void bli_arch_check_id( void )
fprintf( stderr, "libblis: Selecting sub-configuration '%s', model '%s'.\n"
"libblis: User control of sub-configuration using AOCL_ENABLE_INSTRUCTIONS\n"
"libblis: or using "__blis_arch_type_name" and "__blis_model_type_name" is disabled.\n",
bli_arch_string( arch_id ), bli_model_string( model_id ) );
bli_arch_string( g_arch_id ), bli_model_string( g_model_id ) );
#else
fprintf( stderr, "libblis: Selecting sub-configuration '%s', model '%s'.\n",
bli_arch_string( arch_id ), bli_model_string( model_id ) );
bli_arch_string( g_arch_id ), bli_model_string( g_model_id ) );
#endif
}
}
@@ -601,7 +498,7 @@ void bli_arch_check_id( void )
#endif
}
//printf( "blis_arch_check_id(): arch_id, model_id = %u, %u\n", arch_id, model_id );
//printf( "blis_arch_check_id(): g_arch_id, g_model_id = %u, %u\n", g_arch_id, g_model_id );
//exit(1);
}

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -35,25 +36,89 @@
#ifndef BLIS_ARCH_H
#define BLIS_ARCH_H
BLIS_EXPORT_BLIS arch_t bli_arch_query_id( void );
BLIS_EXPORT_BLIS bool bli_aocl_enable_instruction_query( void );
void bli_arch_set_id_once( void );
void bli_arch_set_id( void );
BLIS_EXPORT_BLIS arch_t bli_arch_query_id( void );
void bli_arch_check_id_once( void );
void bli_arch_check_id( void );
BLIS_EXPORT_BLIS model_t bli_model_query_id( void );
BLIS_EXPORT_BLIS model_t bli_init_model_query_id( void );
BLIS_EXPORT_BLIS char* bli_arch_string( arch_t id );
BLIS_EXPORT_BLIS char* bli_model_string( model_t id );
extern arch_t g_arch_id;
extern model_t g_model_id;
extern bli_pthread_once_t once_id_check;
extern bli_pthread_once_t once_id_init;
void bli_arch_set_id( void );
void bli_arch_check_id( void );
void bli_arch_set_logging( bool dolog );
bool bli_arch_get_logging( void );
void bli_arch_log( char*, ... );
BLIS_EXPORT_BLIS model_t bli_model_query_id( void );
BLIS_EXPORT_BLIS model_t bli_init_model_query_id( void );
BLIS_INLINE arch_t bli_arch_query_id_internal( void )
{
BLIS_EXPORT_BLIS char* bli_model_string( model_t id );
#if defined BLIS_FAMILY_INTEL64 || \
defined BLIS_FAMILY_AMDZEN || \
defined BLIS_FAMILY_AMD64_LEGACY || \
defined BLIS_FAMILY_X86_64 || \
defined BLIS_FAMILY_ARM64 || \
defined BLIS_FAMILY_ARM32
// For builds with multiple sub-configurations use the global value
// that will reflect dynamic dispatch, subject to any user override
// via environment variables.
#ifndef BLIS_CONFIGURETIME_CPUID
bli_pthread_once( &once_id_check, bli_arch_check_id );
#endif
// Simply return the id that was previously cached.
return g_arch_id;
#else
#if defined BLIS_FAMILY_TO_ARCH_VALUE
// For single sub-configuration builds, get value from header file
arch_t l_arch_id = BLIS_FAMILY_TO_ARCH_VALUE;
#elif defined BLIS_CONFIGURETIME_CPUID
// For "auto" build, initialize BLIS_FAMILY_TO_ARCH_VALUE to
// generic as starting point for use in architecture detection.
// BLIS will then determine the correct architecture and get
// the correct BLIS_FAMILY_TO_ARCH_VALUE from the relevant
// sub-configuration header file.
arch_t l_arch_id = BLIS_ARCH_GENERIC;
#else
// No fallback if BLIS_FAMILY_TO_ARCH_VALUE is not set in
// the relevant config bli_family header file
#error "BLIS_FAMILY_TO_ARCH_VALUE not defined in relevant config bli_family header file"
#endif
return l_arch_id;
#endif
}
BLIS_INLINE model_t bli_model_query_id_internal( void )
{
#ifndef BLIS_CONFIGURETIME_CPUID
bli_pthread_once( &once_id_check, bli_arch_check_id );
#endif
// Simply return the model_id that was previously cached.
return g_model_id;
}
BLIS_INLINE model_t bli_init_model_query_id_internal( void )
{
#ifndef BLIS_CONFIGURETIME_CPUID
bli_pthread_once( &once_id_init, bli_arch_set_id );
#endif
// Simply return the model_id that was previously cached.
return g_model_id;
}
#endif

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -519,10 +519,10 @@ cntx_t* bli_gks_query_nat_cntx( void )
// Return the address of the native context for the architecture id
// corresponding to the current hardware, as determined by
// bli_arch_query_id().
// bli_arch_query_id_internal().
// Query the architecture id.
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// Use the architecture id to look up a pointer to its context.
cntx_t* cntx = bli_gks_lookup_nat_cntx( arch_id );
@@ -538,7 +538,7 @@ cntx_t* bli_gks_query_cntx_noinit( void )
// does not call bli_init_once().
// Query the architecture id.
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// Use the architecture id to look up a pointer to its context.
cntx_t* cntx = bli_gks_lookup_nat_cntx( arch_id );
@@ -566,7 +566,7 @@ cntx_t* bli_gks_query_ind_cntx
// Return the address of a context that will be suited for executing a
// level-3 operation via the requested induced method (and datatype) for
// the architecture id corresponding to the current hardware, as
// determined by bli_arch_query_id().
// determined by bli_arch_query_id_internal().
// This function is called when a level-3 operation via induced method is
// called, e.g. bli_gemm1m(). If this is the first time that induced method
@@ -577,7 +577,7 @@ cntx_t* bli_gks_query_ind_cntx
// ensure thread safety and deterministic behavior.
// Query the architecture id.
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// Sanity check: verify that the arch_t id is valid.
if ( bli_error_checking_is_enabled() )
@@ -658,7 +658,7 @@ void bli_gks_init_ref_cntx
)
{
// Query the architecture id.
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// Sanity check: verify that the arch_t id is valid.
if ( bli_error_checking_is_enabled() )
@@ -687,7 +687,7 @@ bool bli_gks_cntx_l3_nat_ukr_is_ref
cntx_t ref_cntx;
// Initialize a context with reference kernels for the arch_t id queried
// via bli_arch_query_id().
// via bli_arch_query_id_internal().
bli_gks_init_ref_cntx( &ref_cntx );
// Query each context for the micro-kernel function pointer for the
@@ -779,7 +779,7 @@ kimpl_t bli_gks_l3_ukr_impl_type( l3ukr_t ukr, ind_t method, num_t dt )
cntx_t ref_cntx_l;
// Query the architecture id.
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// Sanity check: verify that the arch_t id is valid.
if ( bli_error_checking_is_enabled() )

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2021 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2021 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -557,7 +557,7 @@ void bli_nthreads_optimum(
dim_t k = bli_obj_width_after_trans(a);
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
if(arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4)
{
@@ -1209,7 +1209,7 @@ void bli_nthreads_optimum(
dim_t k = bli_obj_width_after_trans(a);
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
if( arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4 )
{
@@ -1643,7 +1643,7 @@ void bli_nthreads_optimum(
dim_t k = bli_obj_width_after_trans(a);
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
if( arch_id == BLIS_ARCH_ZEN5 )
{
@@ -2255,7 +2255,7 @@ void bli_nthreads_optimum(
dim_t n = bli_obj_width(c);
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
if (arch_id == BLIS_ARCH_ZEN5)
{

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -303,7 +303,7 @@ f77_int idamax_blis_impl
cntx_t* cntx = NULL;
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// Function pointer declaration for the function
// that will be used by this API

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2020 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -200,7 +200,7 @@ void saxpy_blis_impl
cntx_t *cntx = NULL;
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// Function pointer declaration for the function
// that will be used by this API
@@ -341,7 +341,7 @@ void daxpy_blis_impl
cntx_t *cntx = NULL;
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// Function pointer declaration for the function
// that will be used by this API
@@ -724,7 +724,7 @@ void zaxpy_blis_impl
cntx_t *cntx = NULL;
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// Function pointer declaration for the function
// that will be used by this API

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2022 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -168,7 +168,7 @@ void scopy_blis_impl
cntx_t *cntx = NULL;
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// Function pointer declaration for the function
// that will be used by this API
@@ -290,7 +290,7 @@ void dcopy_blis_impl
cntx_t *cntx = NULL;
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// Function pointer declaration for the function
// that will be used by this API
@@ -552,7 +552,7 @@ void zcopy_blis_impl
cntx_t *cntx = NULL;
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// Function pointer declaration for the function
// that will be used by this API

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -189,7 +189,7 @@ float sdot_blis_impl
cntx_t *cntx = NULL;
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// Function pointer declaration for the function
// that will be used by this API
@@ -334,7 +334,7 @@ double ddot_blis_impl
cntx_t *cntx = NULL;
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// Function pointer declaration for the function
// that will be used by this API
@@ -817,7 +817,7 @@ dcomplex zdotu_blis_impl
cntx_t *cntx = NULL;
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// Function pointer declaration for the function
// that will be used by this API
@@ -1247,7 +1247,7 @@ dcomplex zdotc_blis_impl
cntx_t *cntx = NULL;
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// Function pointer declaration for the function
// that will be used by this API

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -42,7 +42,7 @@
#define GEMM_BLIS_IMPL(ch, blasname) \
PASTEF77S(ch,blasname) ( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ); \
arch_t arch_id = bli_arch_query_id(); \
arch_t arch_id = bli_arch_query_id_internal(); \
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \
{ \
bli_zero_zmm(); \
@@ -684,7 +684,7 @@ void dgemm_blis_impl
err_t k1_status = BLIS_FAILURE;
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
if ( arch_id == BLIS_ARCH_ZEN || arch_id == BLIS_ARCH_ZEN2 ||
arch_id == BLIS_ARCH_ZEN3 )
@@ -821,7 +821,7 @@ void dgemm_blis_impl
bool entry_to_small = false;
/* AVX512 GEMM tiny path is performant enough to handle small skinny inputs on ZEN4/5 */
/* AVX2 gemm_small path is invoked on ZEN/2/3 only */
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
if( arch_id == BLIS_ARCH_ZEN3 || arch_id == BLIS_ARCH_ZEN2 || arch_id == BLIS_ARCH_ZEN )
{
@@ -929,7 +929,7 @@ void dgemm_
{
dgemm_blis_impl(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
#if defined(BLIS_KERNELS_ZEN4)
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4)
{
bli_zero_zmm();
@@ -1179,7 +1179,7 @@ void zgemm_blis_impl
err_t k1_status = BLIS_FAILURE;
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
if ( arch_id == BLIS_ARCH_ZEN || arch_id == BLIS_ARCH_ZEN2 ||
arch_id == BLIS_ARCH_ZEN3 )
@@ -1335,7 +1335,7 @@ void zgemm_blis_impl
#endif
/* Query the architecture ID */
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
/* Boolean to track the entry to small path */
bool entry_to_small = false;
@@ -1491,7 +1491,7 @@ void zgemm_
{
zgemm_blis_impl(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
#if defined(BLIS_KERNELS_ZEN4)
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4)
{
bli_zero_zmm();
@@ -1741,7 +1741,7 @@ void cgemm_blis_impl
if( ( k0 == 1 ) && bli_is_notrans( blis_transa ) && bli_is_notrans( blis_transb ) )
{
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
if ( ( arch_id == BLIS_ARCH_ZEN4 ) || ( arch_id == BLIS_ARCH_ZEN5 ) )
{
@@ -1887,7 +1887,7 @@ void cgemm_
{
cgemm_blis_impl(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
#if defined(BLIS_KERNELS_ZEN4)
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4)
{
bli_zero_zmm();
@@ -2170,7 +2170,7 @@ void sgemm_
{
sgemm_blis_impl(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
#if defined(BLIS_KERNELS_ZEN4)
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4)
{
bli_zero_zmm();
@@ -2340,7 +2340,7 @@ void dzgemm_
{
dzgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc );
#if defined(BLIS_KERNELS_ZEN4)
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4)
{
bli_zero_zmm();

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -44,7 +44,7 @@
#define HEMM_BLIS_IMPL(ch, blasname) \
PASTEF77S(ch,blasname) ( side, uploa, m, n, alpha, a, lda, b, ldb, beta, c, ldc ); \
arch_t arch_id = bli_arch_query_id(); \
arch_t arch_id = bli_arch_query_id_internal(); \
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \
{ \
bli_zero_zmm(); \

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -44,7 +44,7 @@
#define HER2K_BLIS_IMPL(ch, blasname) \
PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, b, ldb, beta, c, ldc ); \
arch_t arch_id = bli_arch_query_id(); \
arch_t arch_id = bli_arch_query_id_internal(); \
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \
{ \
bli_zero_zmm(); \

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -44,7 +44,7 @@
#define HERK_BLIS_IMPL(ch, blasname) \
PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, beta, c, ldc ); \
arch_t arch_id = bli_arch_query_id(); \
arch_t arch_id = bli_arch_query_id_internal(); \
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \
{ \
bli_zero_zmm(); \

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2020 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -157,7 +157,7 @@ void sscal_blis_impl
cntx_t *cntx = NULL;
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// Function pointer declaration for the function
// that will be used by this API
@@ -252,7 +252,7 @@ void dscal_blis_impl
cntx_t *cntx = NULL;
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// Function pointer declaration for the function
// that will be used by this API
@@ -455,7 +455,7 @@ void zdscal_blis_impl
cntx_t *cntx = NULL;
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// Function pointer declaration for the function
// that will be used by this API
@@ -626,7 +626,7 @@ void cscal_blis_impl
cntx_t* cntx = NULL;
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// Function pointer declaration for the function
// that will be used by this API
@@ -721,7 +721,7 @@ void zscal_blis_impl
cntx_t* cntx = NULL;
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// Function pointer declaration for the function
// that will be used by this API

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -44,7 +44,7 @@
#define SYMM_BLIS_IMPL(ch, blasname) \
PASTEF77S(ch,blasname) ( side, uploa, m, n, alpha, a, lda, b, ldb, beta, c, ldc ); \
arch_t arch_id = bli_arch_query_id(); \
arch_t arch_id = bli_arch_query_id_internal(); \
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \
{ \
bli_zero_zmm(); \

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin.
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -44,7 +44,7 @@
#define SYR2K_BLIS_IMPL(ch, blasname) \
PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, b, ldb, beta, c, ldc ); \
arch_t arch_id = bli_arch_query_id(); \
arch_t arch_id = bli_arch_query_id_internal(); \
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \
{ \
bli_zero_zmm(); \

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin.
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -44,7 +44,7 @@
#define SYRK_BLIS_IMPL(ch, blasname) \
PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, beta, c, ldc ); \
arch_t arch_id = bli_arch_query_id(); \
arch_t arch_id = bli_arch_query_id_internal(); \
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \
{ \
bli_zero_zmm(); \

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin.
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -43,7 +43,7 @@
#define TRMM_BLIS_IMPL(ch, blasname) \
PASTEF77S(ch,blasname) ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); \
arch_t arch_id = bli_arch_query_id(); \
arch_t arch_id = bli_arch_query_id_internal(); \
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \
{ \
bli_zero_zmm(); \

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -44,7 +44,7 @@
#define TRSM_BLIS_IMPL(ch, blasname) \
PASTEF77S(ch,blasname) ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); \
arch_t arch_id = bli_arch_query_id(); \
arch_t arch_id = bli_arch_query_id_internal(); \
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \
{ \
bli_zero_zmm(); \
@@ -847,7 +847,7 @@ void strsm_
{
strsm_blis_impl ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb );
#if defined(BLIS_KERNELS_ZEN4)
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4)
{
bli_zero_zmm();
@@ -1139,7 +1139,7 @@ void dtrsm_blis_impl
dtrsm_small_ker_ft trsm_ker_ptr = NULL;
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// dimensions of triangular matrix
// for left variants, dim_a is m0,
@@ -1351,7 +1351,7 @@ void dtrsm_
{
dtrsm_blis_impl ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb );
#if defined(BLIS_KERNELS_ZEN4)
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4)
{
bli_zero_zmm();
@@ -1706,7 +1706,7 @@ void ztrsm_blis_impl
ztrsm_small_ker_ft trsm_ker_ptr = NULL;
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
bool is_parallel = bli_thread_get_is_parallel();
@@ -1895,7 +1895,7 @@ void ztrsm_
{
ztrsm_blis_impl ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb );
#if defined(BLIS_KERNELS_ZEN4)
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4)
{
bli_zero_zmm();
@@ -2306,7 +2306,7 @@ void ctrsm_
{
ctrsm_blis_impl ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb );
#if defined(BLIS_KERNELS_ZEN4)
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4)
{
bli_zero_zmm();

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2024 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2024 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -176,7 +176,7 @@ void saxpby_blis_impl
cntx_t *cntx = NULL;
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// Function pointer declaration for the function
// that will be used by this API
@@ -308,7 +308,7 @@ void daxpby_blis_impl
cntx_t *cntx = NULL;
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// Function pointer declaration for the function
// that will be used by this API
@@ -445,7 +445,7 @@ void caxpby_blis_impl
cntx_t *cntx = NULL;
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// Function pointer declaration for the function
// that will be used by this API
@@ -577,7 +577,7 @@ void zaxpby_blis_impl
cntx_t *cntx = NULL;
// Query the architecture ID
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
// Function pointer declaration for the function
// that will be used by this API

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2020 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -42,7 +42,7 @@
#define GEMMT_BLIS_IMPL(ch, blasname) \
PASTEF77S(ch,blasname) ( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc ); \
arch_t arch_id = bli_arch_query_id(); \
arch_t arch_id = bli_arch_query_id_internal(); \
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \
{ \
bli_zero_zmm(); \

View File

@@ -5,7 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2018 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -322,7 +322,7 @@ void bli_cnormfv_unb_var1
inc_t incx_buf = incx;
// Querying the architecture ID to deploy the appropriate kernel
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
switch ( arch_id )
{
case BLIS_ARCH_ZEN5:
@@ -460,7 +460,7 @@ void bli_znormfv_unb_var1
dim_t simd_factor = 1;
#endif
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
switch ( arch_id )
{
case BLIS_ARCH_ZEN5:
@@ -939,7 +939,7 @@ void bli_snormfv_unb_var1
inc_t incx_buf = incx;
// Querying the architecture ID to deploy the appropriate kernel
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
switch ( arch_id )
{
case BLIS_ARCH_ZEN5:
@@ -1084,7 +1084,7 @@ void bli_dnormfv_unb_var1
dim_t nt_ideal = -1;
#endif
arch_t arch_id = bli_arch_query_id();
arch_t arch_id = bli_arch_query_id_internal();
switch ( arch_id )
{
case BLIS_ARCH_ZEN5:

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2024 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2024 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -377,7 +377,7 @@ void bli_daxpyf_zen4_int_32_mt
BLIS_AXPYF_KER,
BLIS_DOUBLE,
BLIS_DOUBLE,
bli_arch_query_id(),
bli_arch_query_id_internal(),
m,
&nt
);

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2025-26, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2025 - 2026, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -567,7 +567,7 @@ GENT_GEMV_CALLER(double, d, 40, 2, n, st);
BLIS_GEMV_KER, \
PASTEMAC(ch,type), \
BLIS_NO_TRANSPOSE, \
bli_arch_query_id(), \
bli_arch_query_id_internal(), \
m, \
n, \
&nt \
@@ -1034,7 +1034,7 @@ GENT_GEMV_CALLER(double, d, 40, 8, m);
BLIS_GEMV_KER,
BLIS_DOUBLE, //PASTEMAC(d,type),
BLIS_NO_TRANSPOSE,
bli_arch_query_id(),
bli_arch_query_id_internal(),
m,
n,
&nt
@@ -1190,7 +1190,7 @@ void bli_dgemv_m_zen4_int_40x8_mt_Mdiv
BLIS_GEMV_KER,
PASTEMAC(d,type),
BLIS_NO_TRANSPOSE,
bli_arch_query_id(),
bli_arch_query_id_internal(),
m,
n,
&nt
@@ -1313,7 +1313,7 @@ void bli_dgemv_m_zen4_int_40x8_mt_Ndiv
BLIS_GEMV_KER,
PASTEMAC(d,type),
BLIS_NO_TRANSPOSE,
bli_arch_query_id(),
bli_arch_query_id_internal(),
m,
n,
&nt