mirror of
https://github.com/amd/blis.git
synced 2026-04-20 15:48:50 +00:00
Instead of editing a header file, add options to build systems to allow DTL tracing and/or logging output to be generated. For most users logging is recommended, producing a line of output per application thread of every BLAS call made. Tracing provides more detailed info of internal BLIS calls, and is aimed more at expert users and BLIS developers. Different tracing levels from 1 to 10 provide control of the granularity of information produced. The default level is 5. Note that tracing, especially at higher tracing levels, will impose a significant runtime cost overhead. Example usage: Using configure: ./configure ... --enable-aocl-dtl=log amdzen ./configure ... --enable-aocl-dtl=trace --aocl-dtl-trace-level=6 amdzen ./configure ... --enable-aocl-dtl=all amdzen Using CMake: cmake ... -DENABLE_AOCL_DTL=LOG cmake ... -DENABLE_AOCL_DTL=TRACE -DAOCL_DTL_TRACE_LEVEL=6 cmake ... -DENABLE_AOCL_DTL=ALL Also, modify function AOCL_get_requested_threads_count to correct reported thread count in cases where internal value is recorded as -1 AMD-Internal: [CPUPL-7010]
248 lines
5.8 KiB
C
248 lines
5.8 KiB
C
/*
|
|
|
|
BLIS
|
|
An object-based framework for developing high-performance BLAS-like
|
|
libraries.
|
|
|
|
Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are
|
|
met:
|
|
- Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
- Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in the
|
|
documentation and/or other materials provided with the distribution.
|
|
- Neither the name(s) of the copyright holder(s) nor the names of its
|
|
contributors may be used to endorse or promote products derived
|
|
from this software without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
#ifndef BLIS_CONFIG_H
|
|
#define BLIS_CONFIG_H
|
|
|
|
// Enabled configuration "family" (config_name)
|
|
${CONFIG_NAME_DEFINE}
|
|
|
|
// Enabled sub-configurations (config_list)
|
|
${CONFIG_LIST_DEFINES}
|
|
|
|
// Enabled kernel sets (kernel_list)
|
|
${KERNEL_LIST_DEFINES}
|
|
|
|
//This macro is enabled only for ZEN family configurations.
|
|
//This enables us to use different cache-blocking sizes for TRSM instead of common level-3 cache-block sizes.
|
|
#if ${ENABLE_AOCL_ZEN_01}
|
|
#define AOCL_BLIS_ZEN
|
|
#endif
|
|
|
|
#if ${ENABLE_AOCL_DYNAMIC_01}
|
|
#define AOCL_DYNAMIC
|
|
#endif
|
|
|
|
#if ${ENABLE_SYSTEM_01}
|
|
#define BLIS_ENABLE_SYSTEM
|
|
#else
|
|
#define BLIS_DISABLE_SYSTEM
|
|
#endif
|
|
|
|
#if ${ENABLE_OPENMP_01}
|
|
#define BLIS_ENABLE_OPENMP
|
|
#endif
|
|
|
|
#if ${ENABLE_PTHREADS_01}
|
|
#define BLIS_ENABLE_PTHREADS
|
|
#endif
|
|
|
|
#if ${ENABLE_JRIR_SLAB_01}
|
|
#define BLIS_ENABLE_JRIR_SLAB
|
|
#endif
|
|
|
|
#if ${ENABLE_JRIR_RR_01}
|
|
#define BLIS_ENABLE_JRIR_RR
|
|
#endif
|
|
|
|
#if ${ENABLE_PBA_POOLS_01}
|
|
#define BLIS_ENABLE_PBA_POOLS
|
|
#else
|
|
#define BLIS_DISABLE_PBA_POOLS
|
|
#endif
|
|
|
|
#if ${ENABLE_SBA_POOLS_01}
|
|
#define BLIS_ENABLE_SBA_POOLS
|
|
#else
|
|
#define BLIS_DISABLE_SBA_POOLS
|
|
#endif
|
|
|
|
#if ${ENABLE_MEM_TRACING_01}
|
|
#define BLIS_ENABLE_MEM_TRACING
|
|
#else
|
|
#define BLIS_DISABLE_MEM_TRACING
|
|
#endif
|
|
|
|
#if ${INT_TYPE_SIZE} == 64
|
|
#define BLIS_INT_TYPE_SIZE 64
|
|
#elif ${INT_TYPE_SIZE} == 32
|
|
#define BLIS_INT_TYPE_SIZE 32
|
|
#else
|
|
// determine automatically
|
|
#endif
|
|
|
|
#if ${BLAS_INT_TYPE_SIZE} == 64
|
|
#define BLIS_BLAS_INT_TYPE_SIZE 64
|
|
#elif ${BLAS_INT_TYPE_SIZE} == 32
|
|
#define BLIS_BLAS_INT_TYPE_SIZE 32
|
|
#else
|
|
// determine automatically
|
|
#endif
|
|
|
|
#ifndef BLIS_ENABLE_BLAS
|
|
#ifndef BLIS_DISABLE_BLAS
|
|
#if ${ENABLE_BLAS_01}
|
|
#define BLIS_ENABLE_BLAS
|
|
#else
|
|
#define BLIS_DISABLE_BLAS
|
|
#endif
|
|
#endif
|
|
#endif
|
|
|
|
#ifndef BLIS_ENABLE_CBLAS
|
|
#ifndef BLIS_DISABLE_CBLAS
|
|
#if ${ENABLE_CBLAS_01}
|
|
#define BLIS_ENABLE_CBLAS
|
|
#else
|
|
#define BLIS_DISABLE_CBLAS
|
|
#endif
|
|
#endif
|
|
#endif
|
|
|
|
// If the CBLAS compatibility layer was enabled while the BLAS layer
|
|
// was not enabled, we must enable the BLAS layer here. Also undefine
|
|
// BLIS_DISABLE_BLAS to ensure consistency.
|
|
#ifdef BLIS_ENABLE_CBLAS
|
|
#ifndef BLIS_ENABLE_BLAS
|
|
#define BLIS_ENABLE_BLAS
|
|
#endif
|
|
#undef BLIS_DISABLE_BLAS
|
|
#endif // BLIS_ENABLE_CBLAS
|
|
|
|
#ifndef BLIS_ENABLE_MIXED_DT
|
|
#ifndef BLIS_DISABLE_MIXED_DT
|
|
#if ${ENABLE_MIXED_DT_01}
|
|
#define BLIS_ENABLE_MIXED_DT
|
|
#else
|
|
#define BLIS_DISABLE_MIXED_DT
|
|
#endif
|
|
#endif
|
|
#endif
|
|
|
|
#ifndef BLIS_ENABLE_MIXED_DT_EXTRA_MEM
|
|
#ifndef BLIS_DISABLE_MIXED_DT_EXTRA_MEM
|
|
#if ${ENABLE_MIXED_DT_EXTRA_MEM_01}
|
|
#define BLIS_ENABLE_MIXED_DT_EXTRA_MEM
|
|
#else
|
|
#define BLIS_DISABLE_MIXED_DT_EXTRA_MEM
|
|
#endif
|
|
#endif
|
|
#endif
|
|
|
|
#if ${ENABLE_MNK1_MATRIX_01}
|
|
#define BLIS_ENABLE_MNK1_MATRIX
|
|
#else
|
|
#define BLIS_DISABLE_MNK1_MATRIX
|
|
#endif
|
|
|
|
#if ${ENABLE_TINY_MATRIX_01}
|
|
#define BLIS_ENABLE_TINY_MATRIX
|
|
#else
|
|
#define BLIS_DISABLE_TINY_MATRIX
|
|
#endif
|
|
|
|
#if ${ENABLE_SMALL_MATRIX_01}
|
|
#define BLIS_ENABLE_SMALL_MATRIX
|
|
#else
|
|
#define BLIS_DISABLE_SMALL_MATRIX
|
|
#endif
|
|
|
|
#if ${ENABLE_SUP_HANDLING_01}
|
|
#define BLIS_ENABLE_SUP_HANDLING
|
|
#else
|
|
#define BLIS_DISABLE_SUP_HANDLING
|
|
#endif
|
|
|
|
#if ${ENABLE_SMALL_MATRIX_TRSM_01}
|
|
#define BLIS_ENABLE_SMALL_MATRIX_TRSM
|
|
#else
|
|
#define BLIS_DISABLE_SMALL_MATRIX_TRSM
|
|
#endif
|
|
|
|
#if ${ENABLE_MEMKIND_01}
|
|
#define BLIS_ENABLE_MEMKIND
|
|
#else
|
|
#define BLIS_DISABLE_MEMKIND
|
|
#endif
|
|
|
|
#if ${ENABLE_TRSM_PREINVERSION_01}
|
|
#define BLIS_ENABLE_TRSM_PREINVERSION
|
|
#else
|
|
#define BLIS_DISABLE_TRSM_PREINVERSION
|
|
#endif
|
|
|
|
#if ${ENABLE_PRAGMA_OMP_SIMD_01}
|
|
#define BLIS_ENABLE_PRAGMA_OMP_SIMD
|
|
#else
|
|
#define BLIS_DISABLE_PRAGMA_OMP_SIMD
|
|
#endif
|
|
|
|
#if ${ENABLE_SANDBOX_01}
|
|
#define BLIS_ENABLE_SANDBOX
|
|
#else
|
|
#define BLIS_DISABLE_SANDBOX
|
|
#endif
|
|
|
|
#if ${ENABLE_SHARED_01}
|
|
#define BLIS_ENABLE_SHARED
|
|
#else
|
|
#define BLIS_DISABLE_SHARED
|
|
#endif
|
|
|
|
#if ${COMPLEX_RETURN_INTEL_01}
|
|
#define BLIS_ENABLE_COMPLEX_RETURN_INTEL
|
|
#else
|
|
#define BLIS_DISABLE_COMPLEX_RETURN_INTEL
|
|
#endif
|
|
|
|
#if ${DISABLE_BLIS_ARCH_TYPE_01}
|
|
#define DISABLE_BLIS_ARCH_TYPE
|
|
#define DISABLE_BLIS_MODEL_TYPE
|
|
#endif
|
|
|
|
#define __blis_arch_type_name "${RENAME_BLIS_ARCH_TYPE}"
|
|
#define __blis_model_type_name "${RENAME_BLIS_MODEL_TYPE}"
|
|
|
|
#if ${ENABLE_DTL_TRACE_01}
|
|
#define AOCL_DTL_TRACE_ENABLE 1
|
|
#endif
|
|
|
|
#if ${ENABLE_DTL_LOG_01}
|
|
#define AOCL_DTL_LOG_ENABLE 1
|
|
#endif
|
|
|
|
#define AOCL_DTL_TRACE_LEVEL_NUMBER AOCL_DTL_LEVEL_TRACE_${AOCL_DTL_TRACE_LEVEL}
|
|
|
|
#endif
|