mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
Optimize bli_arch_query_id and related functions
bli_arch_query_id() is used to select kernels in optimized BLAS APIs. Previous implementation incurred the overhead of multiple function calls. This has been reduced by: - Changing the function to be defined in a header file so it can be inlined. - Avoiding call to bli_arch_check_id_once that was a wrapper for a call to bli_pthread_once. Instead bli_pthread_once is called directly. - For builds with a single BLIS sub-configuration, correct arch_id is taken directly from a header file in the corresponding config subdirectory, avoiding the bli_pthread_once call and making the value explicit at compile time, which may enable additional optimizations. To enable these changes, the variables arch_id and model_id defined in frame/base/bli_arch.c are no longer static, as they must be accessed in multiple files (i.e. they are now global variables). Rename to g_arch_id and g_model_id to distinguish from any locally defined arch_id or model_id variables.
This commit is contained in:
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2022 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -235,7 +235,7 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,bfloat16,float,bf16bf16f32obf16)
|
||||
* as the tiny path for BF16->FP32 is not available. Hence the arch_id also has to be
|
||||
* verified here.
|
||||
*/
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
if( ( bli_cpuid_is_avx512bf16_supported() == TRUE ) && ( ( arch_id == BLIS_ARCH_ZEN4 ) || ( arch_id == BLIS_ARCH_ZEN5 ) ) && ( is_single_thread( &rntm_g ) == TRUE) )
|
||||
{
|
||||
if( ( is_row_major == TRUE ) &&
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2022 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -240,7 +240,7 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,float,float,bf16bf16f32of32)
|
||||
* as the tiny path for BF16->FP32 is not available. Hence the arch_id also has to be
|
||||
* verified here.
|
||||
*/
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
if( ( bli_cpuid_is_avx512bf16_supported() == TRUE ) && ( ( arch_id == BLIS_ARCH_ZEN4 ) || ( arch_id == BLIS_ARCH_ZEN5 ) ) &&
|
||||
( is_tiny_input_bf16of32( m, n, k, lcntx_g ) == TRUE ) &&
|
||||
( is_single_thread( &rntm_g ) == TRUE) &&
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2022 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -102,7 +102,7 @@ static bli_pthread_once_t once_check_lpgemm_func_map_init = BLIS_PTHREAD_ONCE_IN
|
||||
|
||||
static void _lpgemm_init_enable_arch()
|
||||
{
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
bool enbl_instr = bli_aocl_enable_instruction_query();
|
||||
|
||||
if ( ( enbl_instr == TRUE ) &&
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -35,6 +36,7 @@
|
||||
//#ifndef BLIS_FAMILY_H
|
||||
//#define BLIS_FAMILY_H
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_A64FX
|
||||
|
||||
// -- MEMORY ALLOCATION --------------------------------------------------------
|
||||
|
||||
|
||||
@@ -35,7 +35,6 @@
|
||||
//#ifndef BLIS_FAMILY_H
|
||||
//#define BLIS_FAMILY_H
|
||||
|
||||
|
||||
// -- MEMORY ALLOCATION --------------------------------------------------------
|
||||
|
||||
#define BLIS_SIMD_ALIGN_SIZE 16
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -35,6 +36,7 @@
|
||||
//#ifndef BLIS_FAMILY_H
|
||||
//#define BLIS_FAMILY_H
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_ARMSVE
|
||||
|
||||
// -- MEMORY ALLOCATION --------------------------------------------------------
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -35,6 +36,7 @@
|
||||
//#ifndef BLIS_FAMILY_H
|
||||
//#define BLIS_FAMILY_H
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_BGQ
|
||||
|
||||
#undef restrict
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -35,6 +36,7 @@
|
||||
//#ifndef BLIS_FAMILY_H
|
||||
//#define BLIS_FAMILY_H
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_BULLDOZER
|
||||
|
||||
|
||||
#if 0
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -35,6 +36,7 @@
|
||||
//#ifndef BLIS_FAMILY_H
|
||||
//#define BLIS_FAMILY_H
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_CORTEXA15
|
||||
|
||||
// -- MEMORY ALLOCATION --------------------------------------------------------
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -32,6 +33,7 @@
|
||||
|
||||
*/
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_CORTEXA53
|
||||
|
||||
// -- MEMORY ALLOCATION --------------------------------------------------------
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -35,6 +36,7 @@
|
||||
//#ifndef BLIS_FAMILY_H
|
||||
//#define BLIS_FAMILY_H
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_CORTEXA57
|
||||
|
||||
// -- MEMORY ALLOCATION --------------------------------------------------------
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -35,6 +36,7 @@
|
||||
//#ifndef BLIS_FAMILY_H
|
||||
//#define BLIS_FAMILY_H
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_CORTEXA9
|
||||
|
||||
// -- MEMORY ALLOCATION --------------------------------------------------------
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -35,6 +36,7 @@
|
||||
//#ifndef BLIS_FAMILY_H
|
||||
//#define BLIS_FAMILY_H
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_EXCAVATOR
|
||||
|
||||
// -- MEMORY ALLOCATION --------------------------------------------------------
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -35,6 +36,7 @@
|
||||
//#ifndef BLIS_FAMILY_H
|
||||
//#define BLIS_FAMILY_H
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_FIRESTORM
|
||||
|
||||
// -- MEMORY ALLOCATION --------------------------------------------------------
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2025 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -35,8 +36,7 @@
|
||||
//#ifndef BLIS_FAMILY_H
|
||||
//#define BLIS_FAMILY_H
|
||||
|
||||
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_GENERIC
|
||||
|
||||
//#endif
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2019 - 2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -36,6 +36,7 @@
|
||||
//#ifndef BLIS_FAMILY_H
|
||||
//#define BLIS_FAMILY_H
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_HASWELL
|
||||
|
||||
#if 0
|
||||
// -- LEVEL-3 MICRO-KERNEL CONSTANTS AND DEFINITIONS ---------------------------
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -35,6 +36,7 @@
|
||||
//#ifndef BLIS_FAMILY_H
|
||||
//#define BLIS_FAMILY_H
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_KNC
|
||||
|
||||
// -- THREADING PARAMTERS ------------------------------------------------------
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -35,6 +36,7 @@
|
||||
//#ifndef BLIS_FAMILY_H
|
||||
//#define BLIS_FAMILY_H
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_KNL
|
||||
|
||||
// -- THREADING PARAMETERS -----------------------------------------------------
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -35,6 +36,7 @@
|
||||
//#ifndef BLIS_FAMILY_H
|
||||
//#define BLIS_FAMILY_H
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_PENRYN
|
||||
|
||||
// -- MEMORY ALLOCATION --------------------------------------------------------
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -35,6 +36,7 @@
|
||||
//#ifndef BLIS_FAMILY_H
|
||||
//#define BLIS_FAMILY_H
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_PILEDRIVER
|
||||
|
||||
// -- MEMORY ALLOCATION --------------------------------------------------------
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2019, The University of Texas at Austin
|
||||
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -32,6 +33,8 @@
|
||||
|
||||
*/
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_POWER10
|
||||
|
||||
#define BLIS_POOL_ADDR_ALIGN_SIZE_A 4096
|
||||
#define BLIS_POOL_ADDR_ALIGN_SIZE_B 4096
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -35,6 +36,7 @@
|
||||
//#ifndef BLIS_FAMILY_H
|
||||
//#define BLIS_FAMILY_H
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_POWER7
|
||||
|
||||
#if 0
|
||||
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2019, The University of Texas at Austin
|
||||
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -32,6 +33,8 @@
|
||||
|
||||
*/
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_POWER9
|
||||
|
||||
#define BLIS_POOL_ADDR_ALIGN_SIZE_A 4096
|
||||
#define BLIS_POOL_ADDR_ALIGN_SIZE_B 4096
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -35,6 +36,7 @@
|
||||
//#ifndef BLIS_FAMILY_H
|
||||
//#define BLIS_FAMILY_H
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_SANDYBRIDGE
|
||||
|
||||
#if 0
|
||||
// -- LEVEL-3 MICRO-KERNEL CONSTANTS AND DEFINITIONS ---------------------------
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -35,6 +36,8 @@
|
||||
//#ifndef BLIS_FAMILY_H
|
||||
//#define BLIS_FAMILY_H
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_SKX
|
||||
|
||||
// -- THREADING PARAMETERS -----------------------------------------------------
|
||||
|
||||
#define BLIS_THREAD_RATIO_M 3
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -35,6 +36,7 @@
|
||||
//#ifndef BLIS_FAMILY_H
|
||||
//#define BLIS_FAMILY_H
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_STEAMROLLER
|
||||
|
||||
// -- MEMORY ALLOCATION --------------------------------------------------------
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -35,6 +36,7 @@
|
||||
//#ifndef BLIS_FAMILY_H
|
||||
//#define BLIS_FAMILY_H
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_THUNDERX2
|
||||
|
||||
// -- MEMORY ALLOCATION --------------------------------------------------------
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -36,6 +36,8 @@
|
||||
#ifndef BLIS_FAMILY_ZEN_H
|
||||
#define BLIS_FAMILY_ZEN_H
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_ZEN
|
||||
|
||||
#include "bli_config_zen.h"
|
||||
|
||||
// By default, it is effective to parallelize the outer loops.
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -33,8 +33,10 @@
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLI_FAMILY_ZEN2_
|
||||
#define BLI_FAMILY_ZEN2_
|
||||
#ifndef BLIS_FAMILY_ZEN2_H
|
||||
#define BLIS_FAMILY_ZEN2_H
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_ZEN2
|
||||
|
||||
// By default, it is effective to parallelize the outer loops.
|
||||
// Setting these macros to 1 will force JR and IR inner loops
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2020 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -33,8 +33,10 @@
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLI_FAMILY_ZEN3_
|
||||
#define BLI_FAMILY_ZEN3_
|
||||
#ifndef BLIS_FAMILY_ZEN3_H
|
||||
#define BLIS_FAMILY_ZEN3_H
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_ZEN3
|
||||
|
||||
// By default, it is effective to parallelize the outer loops.
|
||||
// Setting these macros to 1 will force JR and IR inner loops
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2021 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2021 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -32,8 +32,10 @@
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLI_FAMILY_ZEN4_
|
||||
#define BLI_FAMILY_ZEN4_
|
||||
#ifndef BLIS_FAMILY_ZEN4_H
|
||||
#define BLIS_FAMILY_ZEN4_H
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_ZEN4
|
||||
|
||||
#include "bli_config_zen4.h"
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2024 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2024 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -32,8 +32,10 @@
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLI_FAMILY_ZEN5_
|
||||
#define BLI_FAMILY_ZEN5_
|
||||
#ifndef BLIS_FAMILY_ZEN5_H
|
||||
#define BLIS_FAMILY_ZEN5_H
|
||||
|
||||
#define BLIS_FAMILY_TO_ARCH_VALUE BLIS_ARCH_ZEN5
|
||||
|
||||
#include "bli_config_zen5.h"
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -187,7 +187,7 @@ void bli_packm_blk_var1
|
||||
// For DGEMM in AVX512, scale by alpha during packing
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
if
|
||||
(
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2020 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -292,7 +292,7 @@ void bli_dgemv_unf_var1
|
||||
the support of AVX512 or AVX2, if AVX512 - arch_id will be zen4
|
||||
and zen5 or for AVX2 it will be zen3.
|
||||
*/
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
#if defined(BLIS_ENABLE_OPENMP) && defined(AOCL_DYNAMIC)
|
||||
// Setting the threshold to invoke the fast-path
|
||||
@@ -892,7 +892,7 @@ void bli_zgemv_unf_var1
|
||||
the support of AVX512 or AVX2, if AVX512 - arch_id will be zen4
|
||||
or for AVX2 it will be zen3.
|
||||
*/
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
switch ( arch_id )
|
||||
{
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2020 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -271,7 +271,7 @@ void bli_dgemv_unf_var2 (
|
||||
the support of AVX512 or AVX2, if AVX512 - arch_id will be zen4
|
||||
or for AVX2 it will be zen3.
|
||||
*/
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// b_fuse stores the fusing factor for AXPYF kernel.
|
||||
dim_t b_fuse;
|
||||
@@ -707,7 +707,7 @@ void bli_zgemv_unf_var2
|
||||
conja = bli_extract_conj(transa);
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
/*
|
||||
Function pointer declaration for the functions
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -304,7 +304,7 @@ void bli_dtrsv_unf_var1
|
||||
if (bli_cpuid_is_avx2fma3_supported() == TRUE) {
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
switch ( arch_id )
|
||||
{
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -310,7 +310,7 @@ void bli_dtrsv_unf_var2
|
||||
#if defined(BLIS_KERNELS_ZEN4)
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4 )
|
||||
{
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2022 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -242,7 +242,7 @@ static err_t bli_gemm_ic_jc_optimum_sup_arch_dispatcher
|
||||
err_t ret_val = BLIS_FAILURE;
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
if ( arch_id == BLIS_ARCH_ZEN3 )
|
||||
{
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -109,7 +109,7 @@ err_t bli_gemmsup
|
||||
#if defined(BLIS_FAMILY_ZEN5) || defined(BLIS_FAMILY_ZEN4) || defined(BLIS_FAMILY_AMDZEN) || defined(BLIS_FAMILY_X86_64)
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
if(( arch_id == BLIS_ARCH_ZEN5 ) || ( arch_id == BLIS_ARCH_ZEN4 ))
|
||||
{
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -139,7 +139,7 @@ err_t bli_gemmsup_int
|
||||
//Enable packing of B matrix for double data type when dims at per
|
||||
//thread level are above caches and enable packing of A when transA
|
||||
//(RRC or CRC storage ids) to avoid rd kernels
|
||||
if(bli_is_double(dt) && (bli_arch_query_id() == BLIS_ARCH_ZEN3))
|
||||
if(bli_is_double(dt) && (bli_arch_query_id_internal() == BLIS_ARCH_ZEN3))
|
||||
{
|
||||
dim_t m_pt = (m/bli_rntm_ways_for( BLIS_MC, rntm ));
|
||||
dim_t n_pt = (n/bli_rntm_ways_for( BLIS_NC, rntm ));
|
||||
@@ -217,7 +217,7 @@ err_t bli_gemmsup_int
|
||||
//Enable packing of B matrix for double data type when dims at per
|
||||
//thread level are above caches and enable packing of A when transA
|
||||
//(RRC or CRC storage ids) to avoid rd kernels
|
||||
if(bli_is_double(dt) && (bli_arch_query_id() == BLIS_ARCH_ZEN3))
|
||||
if(bli_is_double(dt) && (bli_arch_query_id_internal() == BLIS_ARCH_ZEN3))
|
||||
{
|
||||
dim_t m_pt = (m/bli_rntm_ways_for( BLIS_NC, rntm ));
|
||||
dim_t n_pt = (n/bli_rntm_ways_for( BLIS_MC, rntm ));
|
||||
@@ -427,7 +427,7 @@ err_t bli_gemmtsup_int
|
||||
|
||||
/* Enable packing for A matrix for higher sizes. Note that pack A
|
||||
* * becomes pack B inside var2m because this is transpose case*/
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
/* Do not pack A for ZEN4 and ZEN5 because the GEMM kernels
|
||||
* used are column major and GEMMT kernels used are row major.
|
||||
* Packing matrix A makes matrix B in the GEMMT kernels column
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -61,7 +61,7 @@ void bli_gemm_front
|
||||
// TODO : As part of future work, we have to retune the entry conditions
|
||||
// to native(ZEN3/ZEN2/ZEN), and remove the need for dynamic threading
|
||||
// here (GitHub Issue #114).
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
if( bli_obj_is_dcomplex( c ) && ( ( arch_id == BLIS_ARCH_ZEN3 ) ||
|
||||
( arch_id == BLIS_ARCH_ZEN2 ) || ( arch_id == BLIS_ARCH_ZEN ) ) )
|
||||
{
|
||||
@@ -318,7 +318,7 @@ void bli_gemm_front
|
||||
if ( bli_obj_dt( &c_local ) == bli_obj_dt( &a_local ) &&
|
||||
bli_obj_dt( &c_local ) == bli_obj_dt( &b_local ) )
|
||||
{
|
||||
switch (bli_arch_query_id() )
|
||||
switch (bli_arch_query_id_internal() )
|
||||
{
|
||||
case BLIS_ARCH_ZEN5:
|
||||
#if defined(BLIS_KERNELS_ZEN5)
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -178,7 +178,7 @@ void bli_gemm_ker_var2
|
||||
// TODO: Add macro kernel function pointer in cntx
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
if
|
||||
(
|
||||
|
||||
@@ -74,7 +74,7 @@ err_t PASTEMAC( ch, tfuncname ) \
|
||||
return BLIS_FAILURE; \
|
||||
\
|
||||
/* Query the architecture ID */ \
|
||||
arch_t arch_id = bli_arch_query_id(); \
|
||||
arch_t arch_id = bli_arch_query_id_internal(); \
|
||||
/* Declaring the object to hold the kernel information */ \
|
||||
gemmtiny_ukr_info_t gemmtiny_ukr_info; \
|
||||
/* Variable to flag success/failure of obtaining the kernel */ \
|
||||
@@ -483,7 +483,7 @@ err_t bli_dgemm_tiny
|
||||
)
|
||||
{
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
bool is_mt = bli_thread_get_is_parallel();
|
||||
{
|
||||
// Pick the kernel based on the architecture ID
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018 - 2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -73,8 +73,8 @@ static model_t actual_model_id = -1;
|
||||
// The arch and model ids for the currently running hardware, or the values
|
||||
// the user specifies to use. We initialize to -1, which will be overwritten
|
||||
// upon calling bli_arch_set_id().
|
||||
static arch_t arch_id = -1;
|
||||
static model_t model_id = -1;
|
||||
arch_t g_arch_id = -1;
|
||||
model_t g_model_id = -1;
|
||||
|
||||
// Variable used to communicate if user has set '__blis_arch_type_name' between
|
||||
// bli_arch_set_id() and bli_arch_check_id()
|
||||
@@ -91,48 +91,25 @@ bool bli_aocl_enable_instruction_query( void )
|
||||
|
||||
arch_t bli_arch_query_id( void )
|
||||
{
|
||||
bli_arch_check_id_once();
|
||||
|
||||
// Simply return the id that was previously cached.
|
||||
return arch_id;
|
||||
return bli_arch_query_id_internal();
|
||||
}
|
||||
|
||||
model_t bli_model_query_id( void )
|
||||
{
|
||||
bli_arch_check_id_once();
|
||||
|
||||
// Simply return the model_id that was previously cached.
|
||||
return model_id;
|
||||
return bli_model_query_id_internal();
|
||||
}
|
||||
|
||||
model_t bli_init_model_query_id( void )
|
||||
{
|
||||
bli_arch_set_id_once();
|
||||
|
||||
// Simply return the model_id that was previously cached.
|
||||
return model_id;
|
||||
return bli_init_model_query_id_internal();
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
// A pthread structure used in pthread_once(). pthread_once() is guaranteed to
|
||||
// execute exactly once among all threads that pass in this control object.
|
||||
static bli_pthread_once_t once_id_init = BLIS_PTHREAD_ONCE_INIT;
|
||||
static bli_pthread_once_t once_id_check = BLIS_PTHREAD_ONCE_INIT;
|
||||
|
||||
void bli_arch_set_id_once( void )
|
||||
{
|
||||
#ifndef BLIS_CONFIGURETIME_CPUID
|
||||
bli_pthread_once( &once_id_init, bli_arch_set_id );
|
||||
#endif
|
||||
}
|
||||
|
||||
void bli_arch_check_id_once( void )
|
||||
{
|
||||
#ifndef BLIS_CONFIGURETIME_CPUID
|
||||
bli_pthread_once( &once_id_check, bli_arch_check_id );
|
||||
#endif
|
||||
}
|
||||
bli_pthread_once_t once_id_init = BLIS_PTHREAD_ONCE_INIT;
|
||||
bli_pthread_once_t once_id_check = BLIS_PTHREAD_ONCE_INIT;
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
@@ -209,7 +186,7 @@ void bli_arch_set_id( void )
|
||||
// bli_arch_check_id() called later.
|
||||
|
||||
// For now, we can only be confident that req_id is in range.
|
||||
arch_id = req_id;
|
||||
g_arch_id = req_id;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
@@ -226,101 +203,19 @@ void bli_arch_set_id( void )
|
||||
defined BLIS_FAMILY_X86_64 || \
|
||||
defined BLIS_FAMILY_ARM64 || \
|
||||
defined BLIS_FAMILY_ARM32
|
||||
arch_id = actual_arch_id;
|
||||
#endif
|
||||
|
||||
// Intel microarchitectures.
|
||||
#ifdef BLIS_FAMILY_SKX
|
||||
arch_id = BLIS_ARCH_SKX;
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_KNL
|
||||
arch_id = BLIS_ARCH_KNL;
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_KNC
|
||||
arch_id = BLIS_ARCH_KNC;
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_HASWELL
|
||||
arch_id = BLIS_ARCH_HASWELL;
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_SANDYBRIDGE
|
||||
arch_id = BLIS_ARCH_SANDYBRIDGE;
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_PENRYN
|
||||
arch_id = BLIS_ARCH_PENRYN;
|
||||
#endif
|
||||
|
||||
// AMD microarchitectures.
|
||||
#ifdef BLIS_FAMILY_ZEN5
|
||||
arch_id = BLIS_ARCH_ZEN5;
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_ZEN4
|
||||
arch_id = BLIS_ARCH_ZEN4;
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_ZEN3
|
||||
arch_id = BLIS_ARCH_ZEN3;
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_ZEN2
|
||||
arch_id = BLIS_ARCH_ZEN2;
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_ZEN
|
||||
arch_id = BLIS_ARCH_ZEN;
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_EXCAVATOR
|
||||
arch_id = BLIS_ARCH_EXCAVATOR;
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_STEAMROLLER
|
||||
arch_id = BLIS_ARCH_STEAMROLLER;
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_PILEDRIVER
|
||||
arch_id = BLIS_ARCH_PILEDRIVER;
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_BULLDOZER
|
||||
arch_id = BLIS_ARCH_BULLDOZER;
|
||||
#endif
|
||||
|
||||
// ARM microarchitectures.
|
||||
#ifdef BLIS_FAMILY_ARMSVE
|
||||
arch_id = BLIS_ARCH_ARMSVE;
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_A64FX
|
||||
arch_id = BLIS_ARCH_A64FX;
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_FIRESTORM
|
||||
id = BLIS_ARCH_FIRESTORM;
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_THUNDERX2
|
||||
arch_id = BLIS_ARCH_THUNDERX2;
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_CORTEXA57
|
||||
arch_id = BLIS_ARCH_CORTEXA57;
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_CORTEXA53
|
||||
arch_id = BLIS_ARCH_CORTEXA53;
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_CORTEXA15
|
||||
arch_id = BLIS_ARCH_CORTEXA15;
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_CORTEXA9
|
||||
arch_id = BLIS_ARCH_CORTEXA9;
|
||||
#endif
|
||||
|
||||
// IBM microarchitectures.
|
||||
#ifdef BLIS_FAMILY_POWER10
|
||||
arch_id = BLIS_ARCH_POWER10;
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_POWER9
|
||||
arch_id = BLIS_ARCH_POWER9;
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_POWER7
|
||||
arch_id = BLIS_ARCH_POWER7;
|
||||
#endif
|
||||
#ifdef BLIS_FAMILY_BGQ
|
||||
arch_id = BLIS_ARCH_BGQ;
|
||||
#endif
|
||||
|
||||
// Generic microarchitecture.
|
||||
#ifdef BLIS_FAMILY_GENERIC
|
||||
arch_id = BLIS_ARCH_GENERIC;
|
||||
g_arch_id = actual_arch_id;
|
||||
#else
|
||||
#ifdef BLIS_FAMILY_TO_ARCH_VALUE
|
||||
// For single sub-configuration builds, get value from header file
|
||||
g_arch_id = BLIS_FAMILY_TO_ARCH_VALUE;
|
||||
#else
|
||||
// For "auto" build, initialize to generic as starting point.
|
||||
// It will then determine the correct architecture and set
|
||||
// BLIS_FAMILY_TO_ARCH_VALUE. This will also be the fallback
|
||||
// if BLIS_FAMILY_TO_ARCH_VALUE is not set in the relevant
|
||||
// config header file.
|
||||
g_arch_id = BLIS_ARCH_GENERIC;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -336,14 +231,14 @@ void bli_arch_set_id( void )
|
||||
if ( req_model != -1 )
|
||||
{
|
||||
// BLIS_MODEL_TYPE was set. Cautiously check whether its value is usable.
|
||||
// Assume here that arch_id is valid.
|
||||
// Assume here that g_arch_id is valid.
|
||||
|
||||
// If req_model was set to an invalid model_t value (ie: both outside
|
||||
// the range appropriate for the given architecture and not default),
|
||||
// set to default value and continue.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
{
|
||||
err_t e_val = bli_check_valid_model_id( arch_id, req_model );
|
||||
err_t e_val = bli_check_valid_model_id( g_arch_id, req_model );
|
||||
if (e_val != BLIS_SUCCESS)
|
||||
{
|
||||
req_model = BLIS_MODEL_DEFAULT;
|
||||
@@ -354,7 +249,7 @@ void bli_arch_set_id( void )
|
||||
|
||||
// We can now be confident that req_model is in range for the
|
||||
// selected architecture, or it has been reset to be default.
|
||||
model_id = req_model;
|
||||
g_model_id = req_model;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
@@ -363,25 +258,27 @@ void bli_arch_set_id( void )
|
||||
{
|
||||
// BLIS_MODEL_TYPE was unset. Proceed with normal subconfiguration
|
||||
// selection behavior, based on value of architecture id selected
|
||||
// above. Unlike for arch_id, we cannot simply use actual_model_id
|
||||
// here, as we need to choose model_id based on the arch_id we are
|
||||
// above. Unlike for g_arch_id, we cannot simply use actual_model_id
|
||||
// here, as we need to choose g_model_id based on the g_arch_id we are
|
||||
// using, which could be different to actual_arch_id.
|
||||
|
||||
model_id = bli_cpuid_query_model_id( arch_id );
|
||||
g_model_id = bli_cpuid_query_model_id( g_arch_id );
|
||||
}
|
||||
|
||||
//printf( "blis_arch_query_id(): arch_id, model_id = %u, %u\n", arch_id, model_id );
|
||||
//printf( "blis_arch_query_id(): g_arch_id, g_model_id = %u, %u\n", g_arch_id, g_model_id );
|
||||
//exit(1);
|
||||
}
|
||||
|
||||
void bli_arch_check_id( void )
|
||||
{
|
||||
bli_arch_set_id_once();
|
||||
#ifndef BLIS_CONFIGURETIME_CPUID
|
||||
bli_pthread_once( &once_id_init, bli_arch_set_id );
|
||||
#endif
|
||||
|
||||
bool arch_not_in_build = FALSE;
|
||||
bool arch_reset = FALSE;
|
||||
arch_t orig_arch_id= req_id;
|
||||
model_t orig_model_id = model_id;
|
||||
model_t orig_model_id = g_model_id;
|
||||
|
||||
// Check arch value against configured options. Only needed
|
||||
// if user has set it. This function will also do the
|
||||
@@ -424,7 +321,7 @@ void bli_arch_check_id( void )
|
||||
arch_not_in_build = TRUE;
|
||||
arch_reset = TRUE;
|
||||
req_id = actual_arch_id;
|
||||
model_id = actual_model_id;
|
||||
g_model_id = actual_model_id;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -450,7 +347,7 @@ void bli_arch_check_id( void )
|
||||
case BLIS_ARCH_HASWELL:
|
||||
arch_reset = TRUE;
|
||||
req_id = actual_arch_id;
|
||||
model_id = actual_model_id;
|
||||
g_model_id = actual_model_id;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -469,7 +366,7 @@ void bli_arch_check_id( void )
|
||||
case BLIS_ARCH_SKX:
|
||||
arch_reset = TRUE;
|
||||
req_id = actual_arch_id;
|
||||
model_id = actual_model_id;
|
||||
g_model_id = actual_model_id;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -498,12 +395,12 @@ void bli_arch_check_id( void )
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
// If BLIS_ARCH_TYPE (or renamed version of this environment variable)
|
||||
// was set, we always use this value of req_id to set arch_id.
|
||||
// was set, we always use this value of req_id to set g_arch_id.
|
||||
}
|
||||
|
||||
// Finally, we can be confident that req_id (1) is in range and (2)
|
||||
// refers to a context that has been initialized.
|
||||
arch_id = req_id;
|
||||
g_arch_id = req_id;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -514,18 +411,18 @@ void bli_arch_check_id( void )
|
||||
if ( req_id == -1 && aocl_e_i)
|
||||
{
|
||||
// AOCL_ENABLE_INSTRUCTIONS was set to an invalid value
|
||||
// normal system arch_id was used instead.
|
||||
if ( model_id == BLIS_MODEL_DEFAULT )
|
||||
// normal system g_arch_id was used instead.
|
||||
if ( g_model_id == BLIS_MODEL_DEFAULT )
|
||||
{
|
||||
fprintf( stderr, "libblis: AOCL_ENABLE_INSTRUCTIONS env var was set to an invalid value.\n"
|
||||
"libblis: Selecting system default sub-configuration '%s'.\n",
|
||||
bli_arch_string( arch_id ) );
|
||||
bli_arch_string( g_arch_id ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf( stderr, "libblis: AOCL_ENABLE_INSTRUCTIONS env var was set to an invalid value.\n"
|
||||
"libblis: Selecting system default sub-configuration '%s', model '%s'.\n",
|
||||
bli_arch_string( arch_id ), bli_model_string( model_id ) );
|
||||
bli_arch_string( g_arch_id ), bli_model_string( g_model_id ) );
|
||||
}
|
||||
}
|
||||
else if ( arch_not_in_build )
|
||||
@@ -534,13 +431,13 @@ void bli_arch_check_id( void )
|
||||
{
|
||||
fprintf( stderr, "libblis: Sub-configuration '%s' is not implemented in this build.\n"
|
||||
"libblis: Selecting system default sub-configuration '%s'.\n",
|
||||
bli_arch_string( orig_arch_id ), bli_arch_string( arch_id ) );
|
||||
bli_arch_string( orig_arch_id ), bli_arch_string( g_arch_id ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf( stderr, "libblis: Sub-configuration '%s', model '%s' is not implemented in this build.\n"
|
||||
"libblis: Selecting system default sub-configuration '%s', model '%s'.\n",
|
||||
bli_arch_string( orig_arch_id ), bli_model_string( orig_model_id ), bli_arch_string( arch_id ), bli_model_string( model_id ) );
|
||||
bli_arch_string( orig_arch_id ), bli_model_string( orig_model_id ), bli_arch_string( g_arch_id ), bli_model_string( g_model_id ) );
|
||||
}
|
||||
}
|
||||
else if ( arch_reset )
|
||||
@@ -549,27 +446,27 @@ void bli_arch_check_id( void )
|
||||
{
|
||||
fprintf( stderr, "libblis: Sub-configuration '%s' is not supported on this system.\n"
|
||||
"libblis: Selecting system default sub-configuration '%s'.\n",
|
||||
bli_arch_string( orig_arch_id ), bli_arch_string( arch_id ) );
|
||||
bli_arch_string( orig_arch_id ), bli_arch_string( g_arch_id ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf( stderr, "libblis: Sub-configuration '%s', model '%s' is not supported on this system.\n"
|
||||
"libblis: Selecting system default sub-configuration '%s', model '%s'.\n",
|
||||
bli_arch_string( orig_arch_id ), bli_model_string( orig_model_id ), bli_arch_string( arch_id ), bli_model_string( model_id ) );
|
||||
bli_arch_string( orig_arch_id ), bli_model_string( orig_model_id ), bli_arch_string( g_arch_id ), bli_model_string( g_model_id ) );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( model_id == BLIS_MODEL_DEFAULT )
|
||||
if ( g_model_id == BLIS_MODEL_DEFAULT )
|
||||
{
|
||||
#ifdef DISABLE_BLIS_ARCH_TYPE
|
||||
fprintf( stderr, "libblis: Selecting sub-configuration '%s'.\n"
|
||||
"libblis: User control of sub-configuration using AOCL_ENABLE_INSTRUCTIONS\n"
|
||||
"libblis: or using "__blis_arch_type_name" and "__blis_model_type_name" is disabled.\n",
|
||||
bli_arch_string( arch_id ) );
|
||||
bli_arch_string( g_arch_id ) );
|
||||
#else
|
||||
fprintf( stderr, "libblis: Selecting sub-configuration '%s'.\n",
|
||||
bli_arch_string( arch_id ) );
|
||||
bli_arch_string( g_arch_id ) );
|
||||
#endif
|
||||
}
|
||||
else
|
||||
@@ -578,10 +475,10 @@ void bli_arch_check_id( void )
|
||||
fprintf( stderr, "libblis: Selecting sub-configuration '%s', model '%s'.\n"
|
||||
"libblis: User control of sub-configuration using AOCL_ENABLE_INSTRUCTIONS\n"
|
||||
"libblis: or using "__blis_arch_type_name" and "__blis_model_type_name" is disabled.\n",
|
||||
bli_arch_string( arch_id ), bli_model_string( model_id ) );
|
||||
bli_arch_string( g_arch_id ), bli_model_string( g_model_id ) );
|
||||
#else
|
||||
fprintf( stderr, "libblis: Selecting sub-configuration '%s', model '%s'.\n",
|
||||
bli_arch_string( arch_id ), bli_model_string( model_id ) );
|
||||
bli_arch_string( g_arch_id ), bli_model_string( g_model_id ) );
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@@ -601,7 +498,7 @@ void bli_arch_check_id( void )
|
||||
#endif
|
||||
}
|
||||
|
||||
//printf( "blis_arch_check_id(): arch_id, model_id = %u, %u\n", arch_id, model_id );
|
||||
//printf( "blis_arch_check_id(): g_arch_id, g_model_id = %u, %u\n", g_arch_id, g_model_id );
|
||||
//exit(1);
|
||||
}
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -35,25 +36,89 @@
|
||||
#ifndef BLIS_ARCH_H
|
||||
#define BLIS_ARCH_H
|
||||
|
||||
BLIS_EXPORT_BLIS arch_t bli_arch_query_id( void );
|
||||
BLIS_EXPORT_BLIS bool bli_aocl_enable_instruction_query( void );
|
||||
|
||||
void bli_arch_set_id_once( void );
|
||||
void bli_arch_set_id( void );
|
||||
BLIS_EXPORT_BLIS arch_t bli_arch_query_id( void );
|
||||
|
||||
void bli_arch_check_id_once( void );
|
||||
void bli_arch_check_id( void );
|
||||
BLIS_EXPORT_BLIS model_t bli_model_query_id( void );
|
||||
BLIS_EXPORT_BLIS model_t bli_init_model_query_id( void );
|
||||
|
||||
BLIS_EXPORT_BLIS char* bli_arch_string( arch_t id );
|
||||
BLIS_EXPORT_BLIS char* bli_model_string( model_t id );
|
||||
|
||||
extern arch_t g_arch_id;
|
||||
extern model_t g_model_id;
|
||||
|
||||
extern bli_pthread_once_t once_id_check;
|
||||
extern bli_pthread_once_t once_id_init;
|
||||
|
||||
void bli_arch_set_id( void );
|
||||
|
||||
void bli_arch_check_id( void );
|
||||
|
||||
void bli_arch_set_logging( bool dolog );
|
||||
bool bli_arch_get_logging( void );
|
||||
void bli_arch_log( char*, ... );
|
||||
|
||||
BLIS_EXPORT_BLIS model_t bli_model_query_id( void );
|
||||
BLIS_EXPORT_BLIS model_t bli_init_model_query_id( void );
|
||||
BLIS_INLINE arch_t bli_arch_query_id_internal( void )
|
||||
{
|
||||
|
||||
BLIS_EXPORT_BLIS char* bli_model_string( model_t id );
|
||||
#if defined BLIS_FAMILY_INTEL64 || \
|
||||
defined BLIS_FAMILY_AMDZEN || \
|
||||
defined BLIS_FAMILY_AMD64_LEGACY || \
|
||||
defined BLIS_FAMILY_X86_64 || \
|
||||
defined BLIS_FAMILY_ARM64 || \
|
||||
defined BLIS_FAMILY_ARM32
|
||||
|
||||
// For builds with multiple sub-configurations use the global value
|
||||
// that will reflect dynamic dispatch, subject to any user override
|
||||
// via environment variables.
|
||||
#ifndef BLIS_CONFIGURETIME_CPUID
|
||||
bli_pthread_once( &once_id_check, bli_arch_check_id );
|
||||
#endif
|
||||
// Simply return the id that was previously cached.
|
||||
return g_arch_id;
|
||||
|
||||
#else
|
||||
|
||||
#if defined BLIS_FAMILY_TO_ARCH_VALUE
|
||||
// For single sub-configuration builds, get value from header file
|
||||
arch_t l_arch_id = BLIS_FAMILY_TO_ARCH_VALUE;
|
||||
#elif defined BLIS_CONFIGURETIME_CPUID
|
||||
// For "auto" build, initialize BLIS_FAMILY_TO_ARCH_VALUE to
|
||||
// generic as starting point for use in architecture detection.
|
||||
// BLIS will then determine the correct architecture and get
|
||||
// the correct BLIS_FAMILY_TO_ARCH_VALUE from the relevant
|
||||
// sub-configuration header file.
|
||||
arch_t l_arch_id = BLIS_ARCH_GENERIC;
|
||||
#else
|
||||
// No fallback if BLIS_FAMILY_TO_ARCH_VALUE is not set in
|
||||
// the relevant config bli_family header file
|
||||
#error "BLIS_FAMILY_TO_ARCH_VALUE not defined in relevant config bli_family header file"
|
||||
#endif
|
||||
return l_arch_id;
|
||||
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
BLIS_INLINE model_t bli_model_query_id_internal( void )
|
||||
{
|
||||
#ifndef BLIS_CONFIGURETIME_CPUID
|
||||
bli_pthread_once( &once_id_check, bli_arch_check_id );
|
||||
#endif
|
||||
// Simply return the model_id that was previously cached.
|
||||
return g_model_id;
|
||||
}
|
||||
|
||||
BLIS_INLINE model_t bli_init_model_query_id_internal( void )
|
||||
{
|
||||
#ifndef BLIS_CONFIGURETIME_CPUID
|
||||
bli_pthread_once( &once_id_init, bli_arch_set_id );
|
||||
#endif
|
||||
// Simply return the model_id that was previously cached.
|
||||
return g_model_id;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -519,10 +519,10 @@ cntx_t* bli_gks_query_nat_cntx( void )
|
||||
|
||||
// Return the address of the native context for the architecture id
|
||||
// corresponding to the current hardware, as determined by
|
||||
// bli_arch_query_id().
|
||||
// bli_arch_query_id_internal().
|
||||
|
||||
// Query the architecture id.
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// Use the architecture id to look up a pointer to its context.
|
||||
cntx_t* cntx = bli_gks_lookup_nat_cntx( arch_id );
|
||||
@@ -538,7 +538,7 @@ cntx_t* bli_gks_query_cntx_noinit( void )
|
||||
// does not call bli_init_once().
|
||||
|
||||
// Query the architecture id.
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// Use the architecture id to look up a pointer to its context.
|
||||
cntx_t* cntx = bli_gks_lookup_nat_cntx( arch_id );
|
||||
@@ -566,7 +566,7 @@ cntx_t* bli_gks_query_ind_cntx
|
||||
// Return the address of a context that will be suited for executing a
|
||||
// level-3 operation via the requested induced method (and datatype) for
|
||||
// the architecture id corresponding to the current hardware, as
|
||||
// determined by bli_arch_query_id().
|
||||
// determined by bli_arch_query_id_internal().
|
||||
|
||||
// This function is called when a level-3 operation via induced method is
|
||||
// called, e.g. bli_gemm1m(). If this is the first time that induced method
|
||||
@@ -577,7 +577,7 @@ cntx_t* bli_gks_query_ind_cntx
|
||||
// ensure thread safety and deterministic behavior.
|
||||
|
||||
// Query the architecture id.
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// Sanity check: verify that the arch_t id is valid.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
@@ -658,7 +658,7 @@ void bli_gks_init_ref_cntx
|
||||
)
|
||||
{
|
||||
// Query the architecture id.
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// Sanity check: verify that the arch_t id is valid.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
@@ -687,7 +687,7 @@ bool bli_gks_cntx_l3_nat_ukr_is_ref
|
||||
cntx_t ref_cntx;
|
||||
|
||||
// Initialize a context with reference kernels for the arch_t id queried
|
||||
// via bli_arch_query_id().
|
||||
// via bli_arch_query_id_internal().
|
||||
bli_gks_init_ref_cntx( &ref_cntx );
|
||||
|
||||
// Query each context for the micro-kernel function pointer for the
|
||||
@@ -779,7 +779,7 @@ kimpl_t bli_gks_l3_ukr_impl_type( l3ukr_t ukr, ind_t method, num_t dt )
|
||||
cntx_t ref_cntx_l;
|
||||
|
||||
// Query the architecture id.
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// Sanity check: verify that the arch_t id is valid.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2021 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2021 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -557,7 +557,7 @@ void bli_nthreads_optimum(
|
||||
dim_t k = bli_obj_width_after_trans(a);
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
if(arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4)
|
||||
{
|
||||
@@ -1209,7 +1209,7 @@ void bli_nthreads_optimum(
|
||||
dim_t k = bli_obj_width_after_trans(a);
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
if( arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4 )
|
||||
{
|
||||
@@ -1643,7 +1643,7 @@ void bli_nthreads_optimum(
|
||||
dim_t k = bli_obj_width_after_trans(a);
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
if( arch_id == BLIS_ARCH_ZEN5 )
|
||||
{
|
||||
@@ -2255,7 +2255,7 @@ void bli_nthreads_optimum(
|
||||
dim_t n = bli_obj_width(c);
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
if (arch_id == BLIS_ARCH_ZEN5)
|
||||
{
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -303,7 +303,7 @@ f77_int idamax_blis_impl
|
||||
cntx_t* cntx = NULL;
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// Function pointer declaration for the function
|
||||
// that will be used by this API
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2020 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -200,7 +200,7 @@ void saxpy_blis_impl
|
||||
cntx_t *cntx = NULL;
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// Function pointer declaration for the function
|
||||
// that will be used by this API
|
||||
@@ -341,7 +341,7 @@ void daxpy_blis_impl
|
||||
cntx_t *cntx = NULL;
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// Function pointer declaration for the function
|
||||
// that will be used by this API
|
||||
@@ -724,7 +724,7 @@ void zaxpy_blis_impl
|
||||
cntx_t *cntx = NULL;
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// Function pointer declaration for the function
|
||||
// that will be used by this API
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2022 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -168,7 +168,7 @@ void scopy_blis_impl
|
||||
cntx_t *cntx = NULL;
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// Function pointer declaration for the function
|
||||
// that will be used by this API
|
||||
@@ -290,7 +290,7 @@ void dcopy_blis_impl
|
||||
cntx_t *cntx = NULL;
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// Function pointer declaration for the function
|
||||
// that will be used by this API
|
||||
@@ -552,7 +552,7 @@ void zcopy_blis_impl
|
||||
cntx_t *cntx = NULL;
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// Function pointer declaration for the function
|
||||
// that will be used by this API
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -189,7 +189,7 @@ float sdot_blis_impl
|
||||
cntx_t *cntx = NULL;
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// Function pointer declaration for the function
|
||||
// that will be used by this API
|
||||
@@ -334,7 +334,7 @@ double ddot_blis_impl
|
||||
cntx_t *cntx = NULL;
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// Function pointer declaration for the function
|
||||
// that will be used by this API
|
||||
@@ -817,7 +817,7 @@ dcomplex zdotu_blis_impl
|
||||
cntx_t *cntx = NULL;
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// Function pointer declaration for the function
|
||||
// that will be used by this API
|
||||
@@ -1247,7 +1247,7 @@ dcomplex zdotc_blis_impl
|
||||
cntx_t *cntx = NULL;
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// Function pointer declaration for the function
|
||||
// that will be used by this API
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -42,7 +42,7 @@
|
||||
|
||||
#define GEMM_BLIS_IMPL(ch, blasname) \
|
||||
PASTEF77S(ch,blasname) ( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc ); \
|
||||
arch_t arch_id = bli_arch_query_id(); \
|
||||
arch_t arch_id = bli_arch_query_id_internal(); \
|
||||
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \
|
||||
{ \
|
||||
bli_zero_zmm(); \
|
||||
@@ -684,7 +684,7 @@ void dgemm_blis_impl
|
||||
err_t k1_status = BLIS_FAILURE;
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
if ( arch_id == BLIS_ARCH_ZEN || arch_id == BLIS_ARCH_ZEN2 ||
|
||||
arch_id == BLIS_ARCH_ZEN3 )
|
||||
@@ -821,7 +821,7 @@ void dgemm_blis_impl
|
||||
bool entry_to_small = false;
|
||||
/* AVX512 GEMM tiny path is performant enough to handle small skinny inputs on ZEN4/5 */
|
||||
/* AVX2 gemm_small path is invoked on ZEN/2/3 only */
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
if( arch_id == BLIS_ARCH_ZEN3 || arch_id == BLIS_ARCH_ZEN2 || arch_id == BLIS_ARCH_ZEN )
|
||||
{
|
||||
@@ -929,7 +929,7 @@ void dgemm_
|
||||
{
|
||||
dgemm_blis_impl(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
|
||||
#if defined(BLIS_KERNELS_ZEN4)
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4)
|
||||
{
|
||||
bli_zero_zmm();
|
||||
@@ -1179,7 +1179,7 @@ void zgemm_blis_impl
|
||||
err_t k1_status = BLIS_FAILURE;
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
if ( arch_id == BLIS_ARCH_ZEN || arch_id == BLIS_ARCH_ZEN2 ||
|
||||
arch_id == BLIS_ARCH_ZEN3 )
|
||||
@@ -1335,7 +1335,7 @@ void zgemm_blis_impl
|
||||
#endif
|
||||
|
||||
/* Query the architecture ID */
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
/* Boolean to track the entry to small path */
|
||||
bool entry_to_small = false;
|
||||
@@ -1491,7 +1491,7 @@ void zgemm_
|
||||
{
|
||||
zgemm_blis_impl(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
|
||||
#if defined(BLIS_KERNELS_ZEN4)
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4)
|
||||
{
|
||||
bli_zero_zmm();
|
||||
@@ -1741,7 +1741,7 @@ void cgemm_blis_impl
|
||||
if( ( k0 == 1 ) && bli_is_notrans( blis_transa ) && bli_is_notrans( blis_transb ) )
|
||||
{
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
if ( ( arch_id == BLIS_ARCH_ZEN4 ) || ( arch_id == BLIS_ARCH_ZEN5 ) )
|
||||
{
|
||||
@@ -1887,7 +1887,7 @@ void cgemm_
|
||||
{
|
||||
cgemm_blis_impl(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
|
||||
#if defined(BLIS_KERNELS_ZEN4)
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4)
|
||||
{
|
||||
bli_zero_zmm();
|
||||
@@ -2170,7 +2170,7 @@ void sgemm_
|
||||
{
|
||||
sgemm_blis_impl(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
|
||||
#if defined(BLIS_KERNELS_ZEN4)
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4)
|
||||
{
|
||||
bli_zero_zmm();
|
||||
@@ -2340,7 +2340,7 @@ void dzgemm_
|
||||
{
|
||||
dzgemm_blis_impl( transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc );
|
||||
#if defined(BLIS_KERNELS_ZEN4)
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4)
|
||||
{
|
||||
bli_zero_zmm();
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -44,7 +44,7 @@
|
||||
|
||||
#define HEMM_BLIS_IMPL(ch, blasname) \
|
||||
PASTEF77S(ch,blasname) ( side, uploa, m, n, alpha, a, lda, b, ldb, beta, c, ldc ); \
|
||||
arch_t arch_id = bli_arch_query_id(); \
|
||||
arch_t arch_id = bli_arch_query_id_internal(); \
|
||||
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \
|
||||
{ \
|
||||
bli_zero_zmm(); \
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -44,7 +44,7 @@
|
||||
|
||||
#define HER2K_BLIS_IMPL(ch, blasname) \
|
||||
PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, b, ldb, beta, c, ldc ); \
|
||||
arch_t arch_id = bli_arch_query_id(); \
|
||||
arch_t arch_id = bli_arch_query_id_internal(); \
|
||||
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \
|
||||
{ \
|
||||
bli_zero_zmm(); \
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -44,7 +44,7 @@
|
||||
|
||||
#define HERK_BLIS_IMPL(ch, blasname) \
|
||||
PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, beta, c, ldc ); \
|
||||
arch_t arch_id = bli_arch_query_id(); \
|
||||
arch_t arch_id = bli_arch_query_id_internal(); \
|
||||
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \
|
||||
{ \
|
||||
bli_zero_zmm(); \
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2020 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -157,7 +157,7 @@ void sscal_blis_impl
|
||||
cntx_t *cntx = NULL;
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// Function pointer declaration for the function
|
||||
// that will be used by this API
|
||||
@@ -252,7 +252,7 @@ void dscal_blis_impl
|
||||
cntx_t *cntx = NULL;
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// Function pointer declaration for the function
|
||||
// that will be used by this API
|
||||
@@ -455,7 +455,7 @@ void zdscal_blis_impl
|
||||
cntx_t *cntx = NULL;
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// Function pointer declaration for the function
|
||||
// that will be used by this API
|
||||
@@ -626,7 +626,7 @@ void cscal_blis_impl
|
||||
cntx_t* cntx = NULL;
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// Function pointer declaration for the function
|
||||
// that will be used by this API
|
||||
@@ -721,7 +721,7 @@ void zscal_blis_impl
|
||||
cntx_t* cntx = NULL;
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// Function pointer declaration for the function
|
||||
// that will be used by this API
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -44,7 +44,7 @@
|
||||
|
||||
#define SYMM_BLIS_IMPL(ch, blasname) \
|
||||
PASTEF77S(ch,blasname) ( side, uploa, m, n, alpha, a, lda, b, ldb, beta, c, ldc ); \
|
||||
arch_t arch_id = bli_arch_query_id(); \
|
||||
arch_t arch_id = bli_arch_query_id_internal(); \
|
||||
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \
|
||||
{ \
|
||||
bli_zero_zmm(); \
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin.
|
||||
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -44,7 +44,7 @@
|
||||
|
||||
#define SYR2K_BLIS_IMPL(ch, blasname) \
|
||||
PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, b, ldb, beta, c, ldc ); \
|
||||
arch_t arch_id = bli_arch_query_id(); \
|
||||
arch_t arch_id = bli_arch_query_id_internal(); \
|
||||
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \
|
||||
{ \
|
||||
bli_zero_zmm(); \
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin.
|
||||
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -44,7 +44,7 @@
|
||||
|
||||
#define SYRK_BLIS_IMPL(ch, blasname) \
|
||||
PASTEF77S(ch,blasname) ( uploc, transa, m, k, alpha, a, lda, beta, c, ldc ); \
|
||||
arch_t arch_id = bli_arch_query_id(); \
|
||||
arch_t arch_id = bli_arch_query_id_internal(); \
|
||||
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \
|
||||
{ \
|
||||
bli_zero_zmm(); \
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin.
|
||||
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -43,7 +43,7 @@
|
||||
|
||||
#define TRMM_BLIS_IMPL(ch, blasname) \
|
||||
PASTEF77S(ch,blasname) ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); \
|
||||
arch_t arch_id = bli_arch_query_id(); \
|
||||
arch_t arch_id = bli_arch_query_id_internal(); \
|
||||
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \
|
||||
{ \
|
||||
bli_zero_zmm(); \
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2019 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -44,7 +44,7 @@
|
||||
|
||||
#define TRSM_BLIS_IMPL(ch, blasname) \
|
||||
PASTEF77S(ch,blasname) ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb ); \
|
||||
arch_t arch_id = bli_arch_query_id(); \
|
||||
arch_t arch_id = bli_arch_query_id_internal(); \
|
||||
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \
|
||||
{ \
|
||||
bli_zero_zmm(); \
|
||||
@@ -847,7 +847,7 @@ void strsm_
|
||||
{
|
||||
strsm_blis_impl ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb );
|
||||
#if defined(BLIS_KERNELS_ZEN4)
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4)
|
||||
{
|
||||
bli_zero_zmm();
|
||||
@@ -1139,7 +1139,7 @@ void dtrsm_blis_impl
|
||||
dtrsm_small_ker_ft trsm_ker_ptr = NULL;
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// dimensions of triangular matrix
|
||||
// for left variants, dim_a is m0,
|
||||
@@ -1351,7 +1351,7 @@ void dtrsm_
|
||||
{
|
||||
dtrsm_blis_impl ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb );
|
||||
#if defined(BLIS_KERNELS_ZEN4)
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4)
|
||||
{
|
||||
bli_zero_zmm();
|
||||
@@ -1706,7 +1706,7 @@ void ztrsm_blis_impl
|
||||
ztrsm_small_ker_ft trsm_ker_ptr = NULL;
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
bool is_parallel = bli_thread_get_is_parallel();
|
||||
|
||||
@@ -1895,7 +1895,7 @@ void ztrsm_
|
||||
{
|
||||
ztrsm_blis_impl ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb );
|
||||
#if defined(BLIS_KERNELS_ZEN4)
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4)
|
||||
{
|
||||
bli_zero_zmm();
|
||||
@@ -2306,7 +2306,7 @@ void ctrsm_
|
||||
{
|
||||
ctrsm_blis_impl ( side, uploa, transa, diaga, m, n, alpha, a, lda, b, ldb );
|
||||
#if defined(BLIS_KERNELS_ZEN4)
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4)
|
||||
{
|
||||
bli_zero_zmm();
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2024 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2024 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -176,7 +176,7 @@ void saxpby_blis_impl
|
||||
cntx_t *cntx = NULL;
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// Function pointer declaration for the function
|
||||
// that will be used by this API
|
||||
@@ -308,7 +308,7 @@ void daxpby_blis_impl
|
||||
cntx_t *cntx = NULL;
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// Function pointer declaration for the function
|
||||
// that will be used by this API
|
||||
@@ -445,7 +445,7 @@ void caxpby_blis_impl
|
||||
cntx_t *cntx = NULL;
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// Function pointer declaration for the function
|
||||
// that will be used by this API
|
||||
@@ -577,7 +577,7 @@ void zaxpby_blis_impl
|
||||
cntx_t *cntx = NULL;
|
||||
|
||||
// Query the architecture ID
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
|
||||
// Function pointer declaration for the function
|
||||
// that will be used by this API
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2020 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -42,7 +42,7 @@
|
||||
|
||||
#define GEMMT_BLIS_IMPL(ch, blasname) \
|
||||
PASTEF77S(ch,blasname) ( uploc, transa, transb, n, k, alpha, a, lda, b, ldb, beta, c, ldc ); \
|
||||
arch_t arch_id = bli_arch_query_id(); \
|
||||
arch_t arch_id = bli_arch_query_id_internal(); \
|
||||
if (arch_id == BLIS_ARCH_ZEN5 || arch_id == BLIS_ARCH_ZEN4) \
|
||||
{ \
|
||||
bli_zero_zmm(); \
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2018 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -322,7 +322,7 @@ void bli_cnormfv_unb_var1
|
||||
inc_t incx_buf = incx;
|
||||
|
||||
// Querying the architecture ID to deploy the appropriate kernel
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
switch ( arch_id )
|
||||
{
|
||||
case BLIS_ARCH_ZEN5:
|
||||
@@ -460,7 +460,7 @@ void bli_znormfv_unb_var1
|
||||
dim_t simd_factor = 1;
|
||||
#endif
|
||||
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
switch ( arch_id )
|
||||
{
|
||||
case BLIS_ARCH_ZEN5:
|
||||
@@ -939,7 +939,7 @@ void bli_snormfv_unb_var1
|
||||
inc_t incx_buf = incx;
|
||||
|
||||
// Querying the architecture ID to deploy the appropriate kernel
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
switch ( arch_id )
|
||||
{
|
||||
case BLIS_ARCH_ZEN5:
|
||||
@@ -1084,7 +1084,7 @@ void bli_dnormfv_unb_var1
|
||||
dim_t nt_ideal = -1;
|
||||
#endif
|
||||
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
arch_t arch_id = bli_arch_query_id_internal();
|
||||
switch ( arch_id )
|
||||
{
|
||||
case BLIS_ARCH_ZEN5:
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2024 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2024 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -377,7 +377,7 @@ void bli_daxpyf_zen4_int_32_mt
|
||||
BLIS_AXPYF_KER,
|
||||
BLIS_DOUBLE,
|
||||
BLIS_DOUBLE,
|
||||
bli_arch_query_id(),
|
||||
bli_arch_query_id_internal(),
|
||||
m,
|
||||
&nt
|
||||
);
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2025-26, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2025 - 2026, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -567,7 +567,7 @@ GENT_GEMV_CALLER(double, d, 40, 2, n, st);
|
||||
BLIS_GEMV_KER, \
|
||||
PASTEMAC(ch,type), \
|
||||
BLIS_NO_TRANSPOSE, \
|
||||
bli_arch_query_id(), \
|
||||
bli_arch_query_id_internal(), \
|
||||
m, \
|
||||
n, \
|
||||
&nt \
|
||||
@@ -1034,7 +1034,7 @@ GENT_GEMV_CALLER(double, d, 40, 8, m);
|
||||
BLIS_GEMV_KER,
|
||||
BLIS_DOUBLE, //PASTEMAC(d,type),
|
||||
BLIS_NO_TRANSPOSE,
|
||||
bli_arch_query_id(),
|
||||
bli_arch_query_id_internal(),
|
||||
m,
|
||||
n,
|
||||
&nt
|
||||
@@ -1190,7 +1190,7 @@ void bli_dgemv_m_zen4_int_40x8_mt_Mdiv
|
||||
BLIS_GEMV_KER,
|
||||
PASTEMAC(d,type),
|
||||
BLIS_NO_TRANSPOSE,
|
||||
bli_arch_query_id(),
|
||||
bli_arch_query_id_internal(),
|
||||
m,
|
||||
n,
|
||||
&nt
|
||||
@@ -1313,7 +1313,7 @@ void bli_dgemv_m_zen4_int_40x8_mt_Ndiv
|
||||
BLIS_GEMV_KER,
|
||||
PASTEMAC(d,type),
|
||||
BLIS_NO_TRANSPOSE,
|
||||
bli_arch_query_id(),
|
||||
bli_arch_query_id_internal(),
|
||||
m,
|
||||
n,
|
||||
&nt
|
||||
|
||||
Reference in New Issue
Block a user