mirror of
https://github.com/amd/blis.git
synced 2026-04-20 07:38:53 +00:00
Finer control of code path options (#67)
Add macros to allow specific code options to be enabled or disabled, controlled by options to configure and cmake. This expands on the existing GEMM and/or TRSM functionality to enable/disable SUP handling and replaces the hard coded #define in include files to enable small matrix paths. All options are enabled by default for all BLIS sub-configs but many of them are currently only implemented in AMD specific framework code variants. AMD-Internal: [CPUPL-6906] --------- Co-authored-by: Varaganti, Kiran <Kiran.Varaganti@amd.com>
This commit is contained in:
@@ -277,7 +277,12 @@ option(ENABLE_BLAS "BLAS compatiblity layer" ON)
|
||||
option(ENABLE_CBLAS "CBLAS compatiblity layer" OFF)
|
||||
option(ENABLE_MIXED_DT "Mixed datatype support" ON)
|
||||
option(ENABLE_MIXED_DT_EXTRA_MEM "Mixed datatype optimization requiring extra memory" ON)
|
||||
option(ENABLE_SUP_HANDLING "Small matrix handling" ON)
|
||||
option(ENABLE_MNK1_MATRIX "M, N or K = 1 matrix handling" ON)
|
||||
option(ENABLE_TINY_MATRIX "Tiny matrix handling" ON)
|
||||
option(ENABLE_SMALL_MATRIX "Small matrix handling" ON)
|
||||
option(ENABLE_SUP_HANDLING "SUP matrix handling" ON)
|
||||
option(ENABLE_SMALL_MATRIX_TRSM "TRSM Small matrix handling" ON)
|
||||
option(ENABLE_TRSM_PREINVERSION "Enable TRSM preinversion" ON)
|
||||
if(WIN32)
|
||||
set(ENABLE_MEMKIND "no" CACHE STRING "libmemkind for manage memory pools")
|
||||
set_property(CACHE ENABLE_MEMKIND PROPERTY STRINGS "no")
|
||||
@@ -292,7 +297,6 @@ else()
|
||||
during CMake invokation: auto, yes, no")
|
||||
endif()
|
||||
endif()
|
||||
option(ENABLE_TRSM_PREINVERSION "Enable TRSM preinversion" ON)
|
||||
option(ENABLE_AOCL_DYNAMIC "Dynamic selection of number of threads" ON)
|
||||
set(FORCE_VERSION "no" CACHE STRING "Force configure to use an arbitrary version string")
|
||||
if(WIN32)
|
||||
@@ -608,14 +612,46 @@ else()
|
||||
set(ENABLE_MIXED_DT_EXTRA_MEM_01 0)
|
||||
set(ENABLE_MIXED_DT_01 0)
|
||||
endif()
|
||||
cmake_print_variables(ENABLE_SUP_HANDLING)
|
||||
if(ENABLE_SUP_HANDLING)
|
||||
cmake_print_variables(ENABLE_MNK1_MATRIX)
|
||||
if(ENABLE_MNK1_MATRIX)
|
||||
message(" M, N or K = 1 matrix handling is enabled.")
|
||||
set(ENABLE_MNK1_MATRIX_01 1)
|
||||
else()
|
||||
message(" M, N or K = 1 matrix handling is disabled.")
|
||||
set(ENABLE_MNK1_MATRIX_01 0)
|
||||
endif()
|
||||
cmake_print_variables(ENABLE_TINY_MATRIX)
|
||||
if(ENABLE_TINY_MATRIX)
|
||||
message(" Tiny matrix handling is enabled.")
|
||||
set(ENABLE_TINY_MATRIX_01 1)
|
||||
else()
|
||||
message(" Tiny matrix handling is disabled.")
|
||||
set(ENABLE_TINY_MATRIX_01 0)
|
||||
endif()
|
||||
cmake_print_variables(ENABLE_SMALL_MATRIX)
|
||||
if(ENABLE_SMALL_MATRIX)
|
||||
message(" Small matrix handling is enabled.")
|
||||
set(ENABLE_SUP_HANDLING_01 1)
|
||||
set(ENABLE_SMALL_MATRIX_01 1)
|
||||
else()
|
||||
message(" Small matrix handling is disabled.")
|
||||
set(ENABLE_SMALL_MATRIX_01 0)
|
||||
endif()
|
||||
cmake_print_variables(ENABLE_SUP_HANDLING)
|
||||
if(ENABLE_SUP_HANDLING)
|
||||
message(" SUP matrix handling is enabled.")
|
||||
set(ENABLE_SUP_HANDLING_01 1)
|
||||
else()
|
||||
message(" SUP matrix handling is disabled.")
|
||||
set(ENABLE_SUP_HANDLING_01 0)
|
||||
endif()
|
||||
cmake_print_variables(ENABLE_SMALL_MATRIX_TRSM)
|
||||
if(ENABLE_SMALL_MATRIX)
|
||||
message(" TRSM Small matrix handling is enabled.")
|
||||
set(ENABLE_SMALL_MATRIX_TRSM_01 1)
|
||||
else()
|
||||
message(" TRSM Small matrix handling is disabled.")
|
||||
set(ENABLE_SMALL_MATRIX_TRSM_01 0)
|
||||
endif()
|
||||
cmake_print_variables(ENABLE_TRSM_PREINVERSION)
|
||||
if(ENABLE_TRSM_PREINVERSION)
|
||||
message(" trsm diagonal element pre-inversion is enabled.")
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018 - 2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2018 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -162,12 +162,36 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if @enable_mnk1_matrix@
|
||||
#define BLIS_ENABLE_MNK1_MATRIX
|
||||
#else
|
||||
#define BLIS_DISABLE_MNK1_MATRIX
|
||||
#endif
|
||||
|
||||
#if @enable_tiny_matrix@
|
||||
#define BLIS_ENABLE_TINY_MATRIX
|
||||
#else
|
||||
#define BLIS_DISABLE_TINY_MATRIX
|
||||
#endif
|
||||
|
||||
#if @enable_small_matrix@
|
||||
#define BLIS_ENABLE_SMALL_MATRIX
|
||||
#else
|
||||
#define BLIS_DISABLE_SMALL_MATRIX
|
||||
#endif
|
||||
|
||||
#if @enable_sup_handling@
|
||||
#define BLIS_ENABLE_SUP_HANDLING
|
||||
#else
|
||||
#define BLIS_DISABLE_SUP_HANDLING
|
||||
#endif
|
||||
|
||||
#if @enable_small_matrix_trsm@
|
||||
#define BLIS_ENABLE_SMALL_MATRIX_TRSM
|
||||
#else
|
||||
#define BLIS_DISABLE_SMALL_MATRIX_TRSM
|
||||
#endif
|
||||
|
||||
#if @enable_memkind@
|
||||
#define BLIS_ENABLE_MEMKIND
|
||||
#else
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2020 - 2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2020 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -160,12 +160,36 @@ ${KERNEL_LIST_DEFINES}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if ${ENABLE_MNK1_MATRIX_01}
|
||||
#define BLIS_ENABLE_MNK1_MATRIX
|
||||
#else
|
||||
#define BLIS_DISABLE_MNK1_MATRIX
|
||||
#endif
|
||||
|
||||
#if ${ENABLE_TINY_MATRIX_01}
|
||||
#define BLIS_ENABLE_TINY_MATRIX
|
||||
#else
|
||||
#define BLIS_DISABLE_TINY_MATRIX
|
||||
#endif
|
||||
|
||||
#if ${ENABLE_SMALL_MATRIX_01}
|
||||
#define BLIS_ENABLE_SMALL_MATRIX
|
||||
#else
|
||||
#define BLIS_DISABLE_SMALL_MATRIX
|
||||
#endif
|
||||
|
||||
#if ${ENABLE_SUP_HANDLING_01}
|
||||
#define BLIS_ENABLE_SUP_HANDLING
|
||||
#else
|
||||
#define BLIS_DISABLE_SUP_HANDLING
|
||||
#endif
|
||||
|
||||
#if ${ENABLE_SMALL_MATRIX_TRSM_01}
|
||||
#define BLIS_ENABLE_SMALL_MATRIX_TRSM
|
||||
#else
|
||||
#define BLIS_DISABLE_SMALL_MATRIX_TRSM
|
||||
#endif
|
||||
|
||||
#if ${ENABLE_MEMKIND_01}
|
||||
#define BLIS_ENABLE_MEMKIND
|
||||
#else
|
||||
|
||||
@@ -45,9 +45,6 @@
|
||||
#define BLIS_THREAD_MAX_IR 1
|
||||
#define BLIS_THREAD_MAX_JR 1
|
||||
|
||||
#define BLIS_ENABLE_SMALL_MATRIX
|
||||
#define BLIS_ENABLE_SMALL_MATRIX_TRSM
|
||||
|
||||
// This will select the threshold below which small matrix code will be called.
|
||||
#define BLIS_SMALL_MATRIX_THRES 700
|
||||
#define BLIS_SMALL_M_RECT_MATRIX_THRES 160
|
||||
|
||||
@@ -43,9 +43,6 @@
|
||||
#define BLIS_THREAD_MAX_IR 1
|
||||
#define BLIS_THREAD_MAX_JR 1
|
||||
|
||||
#define BLIS_ENABLE_SMALL_MATRIX
|
||||
#define BLIS_ENABLE_SMALL_MATRIX_TRSM
|
||||
|
||||
// This will select the threshold below which small matrix code will be called.
|
||||
#define BLIS_SMALL_MATRIX_THRES 700
|
||||
#define BLIS_SMALL_M_RECT_MATRIX_THRES 160
|
||||
|
||||
@@ -44,9 +44,6 @@
|
||||
#define BLIS_THREAD_MAX_IR 1
|
||||
#define BLIS_THREAD_MAX_JR 1
|
||||
|
||||
#define BLIS_ENABLE_SMALL_MATRIX
|
||||
#define BLIS_ENABLE_SMALL_MATRIX_TRSM
|
||||
|
||||
// This will select the threshold below which small matrix code will be called.
|
||||
#define BLIS_SMALL_MATRIX_THRES 700
|
||||
#define BLIS_SMALL_M_RECT_MATRIX_THRES 160
|
||||
|
||||
@@ -42,9 +42,6 @@
|
||||
#define BLIS_THREAD_MAX_IR 1
|
||||
#define BLIS_THREAD_MAX_JR 1
|
||||
|
||||
#define BLIS_ENABLE_SMALL_MATRIX
|
||||
#define BLIS_ENABLE_SMALL_MATRIX_TRSM
|
||||
|
||||
// This will select the threshold below which small matrix code will be called.
|
||||
#define BLIS_SMALL_MATRIX_THRES 700
|
||||
#define BLIS_SMALL_M_RECT_MATRIX_THRES 160
|
||||
|
||||
@@ -42,9 +42,6 @@
|
||||
#define BLIS_THREAD_MAX_IR 1
|
||||
#define BLIS_THREAD_MAX_JR 1
|
||||
|
||||
#define BLIS_ENABLE_SMALL_MATRIX
|
||||
#define BLIS_ENABLE_SMALL_MATRIX_TRSM
|
||||
|
||||
// This will select the threshold below which small matrix code will be called.
|
||||
#define BLIS_SMALL_MATRIX_THRES 700
|
||||
#define BLIS_SMALL_M_RECT_MATRIX_THRES 160
|
||||
|
||||
@@ -43,9 +43,6 @@
|
||||
#define BLIS_THREAD_MAX_IR 1
|
||||
#define BLIS_THREAD_MAX_JR 1
|
||||
|
||||
#define BLIS_ENABLE_SMALL_MATRIX
|
||||
#define BLIS_ENABLE_SMALL_MATRIX_TRSM
|
||||
|
||||
// This will select the threshold below which small matrix code will be called.
|
||||
#define BLIS_SMALL_MATRIX_THRES 700
|
||||
#define BLIS_SMALL_M_RECT_MATRIX_THRES 160
|
||||
|
||||
@@ -43,9 +43,6 @@
|
||||
#define BLIS_THREAD_MAX_IR 1
|
||||
#define BLIS_THREAD_MAX_JR 1
|
||||
|
||||
#define BLIS_ENABLE_SMALL_MATRIX
|
||||
#define BLIS_ENABLE_SMALL_MATRIX_TRSM
|
||||
|
||||
// This will select the threshold below which small matrix code will be called.
|
||||
#define BLIS_SMALL_MATRIX_THRES 700
|
||||
#define BLIS_SMALL_M_RECT_MATRIX_THRES 160
|
||||
|
||||
117
configure
vendored
117
configure
vendored
@@ -260,15 +260,58 @@ print_usage()
|
||||
echo " only be enabled when mixed domain/precision support is"
|
||||
echo " enabled."
|
||||
echo " "
|
||||
echo " --disable-sup-handling, --enable-sup-handling"
|
||||
echo " --disable-mnk1-matrix, --enable-mnk1-matrix"
|
||||
echo " "
|
||||
echo " Disable (enabled by default) handling of matrix problem"
|
||||
echo " where M, N or K = 1 via separate code branches. When disabled,"
|
||||
echo " these operations will be performed by gemm rather than gemv"
|
||||
echo " or other optimized implementations."
|
||||
echo " "
|
||||
echo " --disable-tiny-matrix, --enable-tiny-matrix"
|
||||
echo " "
|
||||
echo " Disable (enabled by default) handling of tiny"
|
||||
echo " matrix problems via tiny code branches. When disabled,"
|
||||
echo " these tiny level-3 operations will be performed by"
|
||||
echo " the conventional implementation, which is optimized for"
|
||||
echo " medium and large problems. Note that what qualifies as"
|
||||
echo " \"tiny\" depends on thresholds that may vary by sub-"
|
||||
echo " configuration."
|
||||
echo " Currently only of relevance on configs that include"
|
||||
echo " AMD Zen sub-configs"
|
||||
echo " "
|
||||
echo " --disable-small-matrix, --enable-small-matrix"
|
||||
echo " "
|
||||
echo " Disable (enabled by default) handling of small/skinny"
|
||||
echo " matrix problems via separate code branches. When disabled,"
|
||||
echo " matrix problems via small code branches. When disabled,"
|
||||
echo " these small/skinny level-3 operations will be performed by"
|
||||
echo " the conventional implementation, which is optimized for"
|
||||
echo " medium and large problems. Note that what qualifies as"
|
||||
echo " \"small\" depends on thresholds that may vary by sub-"
|
||||
echo " configuration."
|
||||
echo " Currently only of relevance on configs that include"
|
||||
echo " AMD Zen sub-configs"
|
||||
echo " "
|
||||
echo " --disable-sup-handling, --enable-sup-handling"
|
||||
echo " "
|
||||
echo " Disable (enabled by default) handling of small/skinny"
|
||||
echo " matrix problems via SUP code branches. When disabled,"
|
||||
echo " these small/skinny level-3 operations will be performed by"
|
||||
echo " the conventional implementation, which is optimized for"
|
||||
echo " medium and large problems. Note that what qualifies as"
|
||||
echo " \"SUP\" depends on thresholds that may vary by sub-"
|
||||
echo " configuration."
|
||||
echo " "
|
||||
echo " --disable-small-matrix-trsm, --enable-small-matrix-trsm"
|
||||
echo " "
|
||||
echo " Disable (enabled by default) handling of small/skinny"
|
||||
echo " TRSM problems via small code branches. When disabled,"
|
||||
echo " these small/skinny level-3 operations will be performed by"
|
||||
echo " the conventional implementation, which is optimized for"
|
||||
echo " medium and large problems. Note that what qualifies as"
|
||||
echo " \"small\" depends on thresholds that may vary by sub-"
|
||||
echo " configuration."
|
||||
echo " Currently only of relevance on configs that include"
|
||||
echo " AMD Zen sub-configs"
|
||||
echo " "
|
||||
echo " -a NAME --enable-addon=NAME"
|
||||
echo " "
|
||||
@@ -2108,9 +2151,15 @@ main()
|
||||
enable_cblas='no'
|
||||
enable_mixed_dt='yes'
|
||||
enable_mixed_dt_extra_mem='yes'
|
||||
|
||||
enable_mnk1_matrix='yes'
|
||||
enable_tiny_matrix='yes'
|
||||
enable_small_matrix='yes'
|
||||
enable_sup_handling='yes'
|
||||
enable_memkind='' # The default memkind value is determined later on.
|
||||
enable_small_matrix_trsm='yes'
|
||||
enable_trsm_preinversion='yes'
|
||||
|
||||
enable_memkind='' # The default memkind value is determined later on.
|
||||
enable_aocl_dynamic='yes'
|
||||
force_version='no'
|
||||
complex_return='default'
|
||||
@@ -2319,12 +2368,36 @@ main()
|
||||
disable-mixed-dt-extra-mem)
|
||||
enable_mixed_dt_extra_mem='no'
|
||||
;;
|
||||
enable-mnk1-matrix)
|
||||
enable_mnk1_matrix='yes'
|
||||
;;
|
||||
disable-mnk1-matrix)
|
||||
enable_mnk1_matrix='no'
|
||||
;;
|
||||
enable-tiny-matrix)
|
||||
enable_tiny_matrix='yes'
|
||||
;;
|
||||
disable-tiny-matrix)
|
||||
enable_tiny_matrix='no'
|
||||
;;
|
||||
enable-small-matrix)
|
||||
enable_small_matrix='yes'
|
||||
;;
|
||||
disable-small-matrix)
|
||||
enable_small_matrix='no'
|
||||
;;
|
||||
enable-sup-handling)
|
||||
enable_sup_handling='yes'
|
||||
;;
|
||||
disable-sup-handling)
|
||||
enable_sup_handling='no'
|
||||
;;
|
||||
enable-small-matrix-trsm)
|
||||
enable_small_matrix_trsm='yes'
|
||||
;;
|
||||
disable-small-matrix-trsm)
|
||||
enable_small_matrix_trsm='no'
|
||||
;;
|
||||
with-memkind)
|
||||
enable_memkind='yes'
|
||||
;;
|
||||
@@ -3213,13 +3286,41 @@ main()
|
||||
enable_mixed_dt_extra_mem_01=0
|
||||
enable_mixed_dt_01=0
|
||||
fi
|
||||
if [ "x${enable_sup_handling}" = "xyes" ]; then
|
||||
if [ "x${enable_mnk1_matrix}" = "xyes" ]; then
|
||||
echo "${script_name}: M,N,K=1 matrix handling is enabled."
|
||||
enable_mnk1_matrix_01=1
|
||||
else
|
||||
echo "${script_name}: M,N,K=1 matrix handling is disabled."
|
||||
enable_mnk1_matrix_01=0
|
||||
fi
|
||||
if [ "x${enable_tiny_matrix}" = "xyes" ]; then
|
||||
echo "${script_name}: tiny matrix handling is enabled."
|
||||
enable_tiny_matrix_01=1
|
||||
else
|
||||
echo "${script_name}: tiny matrix handling is disabled."
|
||||
enable_tiny_matrix_01=0
|
||||
fi
|
||||
if [ "x${enable_small_matrix}" = "xyes" ]; then
|
||||
echo "${script_name}: small matrix handling is enabled."
|
||||
enable_sup_handling_01=1
|
||||
enable_small_matrix_01=1
|
||||
else
|
||||
echo "${script_name}: small matrix handling is disabled."
|
||||
enable_small_matrix_01=0
|
||||
fi
|
||||
if [ "x${enable_sup_handling}" = "xyes" ]; then
|
||||
echo "${script_name}: SUP matrix handling is enabled."
|
||||
enable_sup_handling_01=1
|
||||
else
|
||||
echo "${script_name}: SUP matrix handling is disabled."
|
||||
enable_sup_handling_01=0
|
||||
fi
|
||||
if [ "x${enable_small_matrix_trsm}" = "xyes" ]; then
|
||||
echo "${script_name}: TRSM small matrix handling is enabled."
|
||||
enable_small_matrix_trsm_01=1
|
||||
else
|
||||
echo "${script_name}: TRSM small matrix handling is disabled."
|
||||
enable_small_matrix_trsm_01=0
|
||||
fi
|
||||
if [ "x${enable_trsm_preinversion}" = "xyes" ]; then
|
||||
echo "${script_name}: trsm diagonal element pre-inversion is enabled."
|
||||
enable_trsm_preinversion_01=1
|
||||
@@ -3586,9 +3687,13 @@ main()
|
||||
| sed -e "s/@enable_cblas@/${enable_cblas_01}/g" \
|
||||
| sed -e "s/@enable_mixed_dt@/${enable_mixed_dt_01}/g" \
|
||||
| sed -e "s/@enable_mixed_dt_extra_mem@/${enable_mixed_dt_extra_mem_01}/g" \
|
||||
| sed -e "s/@enable_mnk1_matrix@/${enable_mnk1_matrix_01}/g" \
|
||||
| sed -e "s/@enable_tiny_matrix@/${enable_tiny_matrix_01}/g" \
|
||||
| sed -e "s/@enable_small_matrix@/${enable_small_matrix_01}/g" \
|
||||
| sed -e "s/@enable_sup_handling@/${enable_sup_handling_01}/g" \
|
||||
| sed -e "s/@enable_memkind@/${enable_memkind_01}/g" \
|
||||
| sed -e "s/@enable_small_matrix_trsm@/${enable_small_matrix_trsm_01}/g" \
|
||||
| sed -e "s/@enable_trsm_preinversion@/${enable_trsm_preinversion_01}/g" \
|
||||
| sed -e "s/@enable_memkind@/${enable_memkind_01}/g" \
|
||||
| sed -e "s/@enable_aocl_dynamic@/${enable_aocl_dynamic_01}/g" \
|
||||
| sed -e "s/@enable_pragma_omp_simd@/${enable_pragma_omp_simd_01}/g" \
|
||||
| sed -e "s/@enable_sandbox@/${enable_sandbox_01}/g" \
|
||||
|
||||
@@ -53,4 +53,4 @@ err_t PASTEMAC( ch, tfuncname ) \
|
||||
); \
|
||||
|
||||
GENTFUNC( scomplex, c, gemm_tiny )
|
||||
GENTFUNC( dcomplex, z, gemm_tiny )
|
||||
GENTFUNC( dcomplex, z, gemm_tiny )
|
||||
|
||||
@@ -285,6 +285,7 @@ void PASTEF77S(ch,blasname) \
|
||||
const inc_t rs_c = 1; \
|
||||
const inc_t cs_c = *ldc; \
|
||||
\
|
||||
IF_BLIS_ENABLE_MNK1_MATRIX(\
|
||||
if( n0 == 1 ) \
|
||||
{ \
|
||||
if(bli_is_notrans(blis_transa)) \
|
||||
@@ -357,6 +358,7 @@ void PASTEF77S(ch,blasname) \
|
||||
bli_finalize_auto(); \
|
||||
return; \
|
||||
} \
|
||||
) /* End of IF_BLIS_ENABLE_MNK1_MATRIX */ \
|
||||
\
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
\
|
||||
|
||||
@@ -304,6 +304,7 @@ void PASTEF77S(ch,blasname) \
|
||||
const inc_t rs_c = 1; \
|
||||
const inc_t cs_c = *ldc; \
|
||||
\
|
||||
IF_BLIS_ENABLE_MNK1_MATRIX(\
|
||||
if( n0 == 1 ) \
|
||||
{ \
|
||||
if(bli_is_notrans(blis_transa)) \
|
||||
@@ -380,6 +381,7 @@ void PASTEF77S(ch,blasname) \
|
||||
bli_finalize_auto(); \
|
||||
return; \
|
||||
} \
|
||||
) /* End of IF_BLIS_ENABLE_MNK1_MATRIX */ \
|
||||
\
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
\
|
||||
@@ -534,6 +536,8 @@ void dgemm_blis_impl
|
||||
const inc_t rs_c = 1;
|
||||
const inc_t cs_c = *ldc;
|
||||
|
||||
#ifdef BLIS_ENABLE_MNK1_MATRIX
|
||||
|
||||
/* Call GEMV when m == 1 or n == 1 with the context set
|
||||
to an uninitialized void pointer i.e. ((void *)0)*/
|
||||
if (n0 == 1)
|
||||
@@ -615,6 +619,8 @@ void dgemm_blis_impl
|
||||
return;
|
||||
}
|
||||
|
||||
#endif // End of BLIS_ENABLE_MNK1_MATRIX
|
||||
|
||||
// This function is invoked on all architectures including 'generic'.
|
||||
// Non-AVX2+FMA3 platforms will use the kernels derived from the context.
|
||||
if (bli_cpuid_is_avx2fma3_supported() == FALSE)
|
||||
@@ -665,6 +671,8 @@ void dgemm_blis_impl
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef BLIS_ENABLE_MNK1_MATRIX
|
||||
|
||||
/*
|
||||
Invoking the API for input sizes with k = 1.
|
||||
- The API is single-threaded.
|
||||
@@ -715,6 +723,9 @@ void dgemm_blis_impl
|
||||
}
|
||||
}
|
||||
|
||||
#endif // End of BLIS_ENABLE_MNK1_MATRIX
|
||||
|
||||
#ifdef BLIS_ENABLE_TINY_MATRIX
|
||||
/**
|
||||
*Early check for tiny sizes.
|
||||
*if inputs are in range of tiny gemm kernel,
|
||||
@@ -746,6 +757,7 @@ void dgemm_blis_impl
|
||||
bli_finalize_auto();
|
||||
return;
|
||||
}
|
||||
#endif // End of BLIS_ENABLE_TINY_MATRIX
|
||||
|
||||
const num_t dt = BLIS_DOUBLE;
|
||||
|
||||
@@ -859,8 +871,9 @@ void dgemm_blis_impl
|
||||
}
|
||||
}
|
||||
|
||||
#endif //#ifdef BLIS_ENABLE_SMALL_MATRIX
|
||||
#endif // End of BLIS_ENABLE_SMALL_MATRIX
|
||||
|
||||
#ifdef BLIS_ENABLE_SUP_HANDLING
|
||||
err_t sup_status = BLIS_FAILURE;
|
||||
sup_status = bli_gemmsup(&alphao, &ao, &bo, &betao, &co, NULL, NULL);
|
||||
if ( sup_status == BLIS_SUCCESS )
|
||||
@@ -871,6 +884,7 @@ void dgemm_blis_impl
|
||||
bli_finalize_auto();
|
||||
return;
|
||||
}
|
||||
#endif // End of BLIS_ENABLE_SUP_HANDLING
|
||||
|
||||
// fall back on native path when dgemm is not handled in sup path.
|
||||
//bli_gemmnat(&alphao, &ao, &bo, &betao, &co, NULL, NULL);
|
||||
@@ -1012,6 +1026,8 @@ void zgemm_blis_impl
|
||||
const inc_t rs_c = 1;
|
||||
const inc_t cs_c = *ldc;
|
||||
|
||||
#ifdef BLIS_ENABLE_MNK1_MATRIX
|
||||
|
||||
/* Call GEMV when m == 1 or n == 1 with the context set
|
||||
to an uninitialized void pointer i.e. ((void *)0)*/
|
||||
if (n0 == 1)
|
||||
@@ -1093,6 +1109,8 @@ void zgemm_blis_impl
|
||||
return;
|
||||
}
|
||||
|
||||
#endif // End of BLIS_ENABLE_MNK1_MATRIX
|
||||
|
||||
// This function is invoked on all architectures including 'generic'.
|
||||
// Non-AVX2+FMA3 platforms will use the kernels derived from the context.
|
||||
if (bli_cpuid_is_avx2fma3_supported() == FALSE)
|
||||
@@ -1143,6 +1161,8 @@ void zgemm_blis_impl
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef BLIS_ENABLE_MNK1_MATRIX
|
||||
|
||||
/*
|
||||
Invoking the API for input sizes with k = 1.
|
||||
- The API is single-threaded.
|
||||
@@ -1240,6 +1260,11 @@ void zgemm_blis_impl
|
||||
}
|
||||
}
|
||||
|
||||
#endif // End of BLIS_ENABLE_MNK1_MATRIX
|
||||
|
||||
#ifdef BLIS_ENABLE_TINY_MATRIX
|
||||
|
||||
// May also be used in small path below
|
||||
bool is_parallel = bli_thread_get_is_parallel(); // Check if parallel zgemm is invoked.
|
||||
|
||||
// Tiny gemm dispatch
|
||||
@@ -1271,6 +1296,8 @@ void zgemm_blis_impl
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // End of BLIS_ENABLE_TINY_MATRIX
|
||||
|
||||
const num_t dt = BLIS_DCOMPLEX;
|
||||
|
||||
obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1;
|
||||
@@ -1297,6 +1324,11 @@ void zgemm_blis_impl
|
||||
|
||||
#ifdef BLIS_ENABLE_SMALL_MATRIX
|
||||
|
||||
/* Check if we have already defined this above */
|
||||
#ifndef BLIS_ENABLE_TINY_MATRIX
|
||||
bool is_parallel = bli_thread_get_is_parallel(); // Check if parallel zgemm is invoked.
|
||||
#endif
|
||||
|
||||
/* Query the architecture ID */
|
||||
arch_t arch_id = bli_arch_query_id();
|
||||
|
||||
@@ -1389,8 +1421,9 @@ void zgemm_blis_impl
|
||||
}
|
||||
}
|
||||
|
||||
#endif //#ifdef BLIS_ENABLE_SMALL_MATRIX
|
||||
#endif // End of BLIS_ENABLE_SMALL_MATRIX
|
||||
|
||||
#ifdef BLIS_ENABLE_SUP_HANDLING
|
||||
err_t sup_status = BLIS_FAILURE;
|
||||
sup_status = bli_gemmsup(&alphao, &ao, &bo, &betao, &co, NULL, NULL);
|
||||
if ( sup_status == BLIS_SUCCESS )
|
||||
@@ -1401,6 +1434,7 @@ void zgemm_blis_impl
|
||||
bli_finalize_auto();
|
||||
return;
|
||||
}
|
||||
#endif // End of BLIS_ENABLE_SUP_HANDLING
|
||||
|
||||
// fall back on native path when zgemm is not handled in sup path.
|
||||
//bli_gemmnat(&alphao, &ao, &bo, &betao, &co, NULL, NULL);
|
||||
@@ -1555,6 +1589,8 @@ void cgemm_blis_impl
|
||||
const inc_t rs_c = 1;
|
||||
const inc_t cs_c = *ldc;
|
||||
|
||||
#ifdef BLIS_ENABLE_MNK1_MATRIX
|
||||
|
||||
/* Call GEMV when m == 1 or n == 1 with the context set
|
||||
to an uninitialized void pointer i.e. ((void *)0)*/
|
||||
if (n0 == 1)
|
||||
@@ -1636,6 +1672,8 @@ void cgemm_blis_impl
|
||||
return;
|
||||
}
|
||||
|
||||
#endif // End of BLIS_ENABLE_MNK1_MATRIX
|
||||
|
||||
// This function is invoked on all architectures including 'generic'.
|
||||
// Non-AVX2+FMA3 platforms will use the kernels derived from the context.
|
||||
if (bli_cpuid_is_avx2fma3_supported() == FALSE)
|
||||
@@ -1686,6 +1724,8 @@ void cgemm_blis_impl
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef BLIS_ENABLE_MNK1_MATRIX
|
||||
|
||||
/*
|
||||
Invoking the API for input sizes with k = 1.
|
||||
- The API is single-threaded.
|
||||
@@ -1719,6 +1759,10 @@ void cgemm_blis_impl
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // End of BLIS_ENABLE_MNK1_MATRIX
|
||||
|
||||
#ifdef BLIS_ENABLE_TINY_MATRIX
|
||||
|
||||
bool is_parallel = bli_thread_get_is_parallel(); // Check if parallel cgemm is invoked.
|
||||
|
||||
// Tiny gemm dispatch
|
||||
@@ -1750,6 +1794,8 @@ void cgemm_blis_impl
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // End of BLIS_ENABLE_TINY_MATRIX
|
||||
|
||||
const num_t dt = BLIS_SCOMPLEX;
|
||||
|
||||
obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1;
|
||||
@@ -1774,6 +1820,7 @@ void cgemm_blis_impl
|
||||
bli_obj_set_conjtrans( blis_transa, &ao );
|
||||
bli_obj_set_conjtrans( blis_transb, &bo );
|
||||
|
||||
#ifdef BLIS_ENABLE_SUP_HANDLING
|
||||
err_t sup_status = BLIS_FAILURE;
|
||||
sup_status = bli_gemmsup(&alphao, &ao, &bo, &betao, &co, NULL, NULL);
|
||||
if ( sup_status == BLIS_SUCCESS )
|
||||
@@ -1784,6 +1831,7 @@ void cgemm_blis_impl
|
||||
bli_finalize_auto();
|
||||
return;
|
||||
}
|
||||
#endif // End of BLIS_ENABLE_SUP_HANDLING
|
||||
|
||||
// fall back on native path when cgemm is not handled in sup path.
|
||||
//bli_gemmnat(&alphao, &ao, &bo, &betao, &co, NULL, NULL);
|
||||
|
||||
@@ -317,6 +317,8 @@ void dgemv_blis_impl
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef BLIS_ENABLE_TINY_MATRIX
|
||||
|
||||
/**
|
||||
* DGEMV Tiny Path
|
||||
* If the matrix dimensions are within 8x8 then calculate the result
|
||||
@@ -341,6 +343,8 @@ void dgemv_blis_impl
|
||||
return;
|
||||
}
|
||||
|
||||
#endif // End of BLIS_ENABLE_TINY_MATRIX
|
||||
|
||||
/* Call variants based on transpose value. */
|
||||
if((bli_does_notrans(blis_transa) && bli_is_col_stored( rs_a, cs_a ))
|
||||
|| (bli_does_trans(blis_transa) && bli_is_row_stored( rs_a, cs_a )))
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2019 - 2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -247,6 +247,7 @@ void PASTEF77S(ch,blasname) \
|
||||
/* If Transpose(A) uplo = higher then uplo = lower */ \
|
||||
/* ----------------------------------------------------------- */ \
|
||||
\
|
||||
IF_BLIS_ENABLE_MNK1_MATRIX(\
|
||||
if( n0 == 1 ) \
|
||||
{ \
|
||||
if( blis_side == BLIS_LEFT ) \
|
||||
@@ -375,6 +376,7 @@ void PASTEF77S(ch,blasname) \
|
||||
return; \
|
||||
} \
|
||||
} \
|
||||
) /* End of IF_BLIS_ENABLE_MNK1_MATRIX */ \
|
||||
\
|
||||
const struc_t struca = BLIS_TRIANGULAR; \
|
||||
\
|
||||
|
||||
@@ -308,6 +308,7 @@ void PASTEF77S(ch,blasname) \
|
||||
/* If Transpose(A) uplo = higher then uplo = lower */ \
|
||||
/* ----------------------------------------------------------- */ \
|
||||
\
|
||||
IF_BLIS_ENABLE_MNK1_MATRIX(\
|
||||
if( n0 == 1 ) \
|
||||
{ \
|
||||
if( blis_side == BLIS_LEFT ) \
|
||||
@@ -442,6 +443,7 @@ void PASTEF77S(ch,blasname) \
|
||||
return; \
|
||||
} \
|
||||
} \
|
||||
) /* End of IF_BLIS_ENABLE_MNK1_MATRIX */ \
|
||||
\
|
||||
const struc_t struca = BLIS_TRIANGULAR; \
|
||||
\
|
||||
@@ -588,6 +590,8 @@ void strsm_blis_impl
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef BLIS_ENABLE_MNK1_MATRIX
|
||||
|
||||
if( n0 == 1 )
|
||||
{
|
||||
if( blis_side == BLIS_LEFT )
|
||||
@@ -732,6 +736,9 @@ void strsm_blis_impl
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
#endif // End of BLIS_ENABLE_MNK1_MATRIX
|
||||
|
||||
const struc_t struca = BLIS_TRIANGULAR;
|
||||
|
||||
obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1;
|
||||
@@ -789,7 +796,7 @@ void strsm_blis_impl
|
||||
}
|
||||
} // bli_cpuid_is_avx2fma3_supported
|
||||
|
||||
#endif //#ifdef BLIS_ENABLE_SMALL_MATRIX
|
||||
#endif // End of BLIS_ENABLE_SMALL_MATRIX_TRSM
|
||||
|
||||
//bli_trsmnat
|
||||
//(
|
||||
@@ -932,6 +939,8 @@ void dtrsm_blis_impl
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef BLIS_ENABLE_MNK1_MATRIX
|
||||
|
||||
if( n0 == 1 )
|
||||
{
|
||||
if( blis_side == BLIS_LEFT )
|
||||
@@ -1077,6 +1086,8 @@ void dtrsm_blis_impl
|
||||
}
|
||||
}
|
||||
|
||||
#endif // End of BLIS_ENABLE_MNK1_MATRIX
|
||||
|
||||
const struc_t struca = BLIS_TRIANGULAR;
|
||||
|
||||
obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1;
|
||||
@@ -1287,7 +1298,7 @@ void dtrsm_blis_impl
|
||||
}
|
||||
} // bli_cpuid_is_avx2fma3_supported
|
||||
|
||||
#endif //#ifdef BLIS_ENABLE_SMALL_MATRIX
|
||||
#endif // End of BLIS_ENABLE_SMALL_MATRIX
|
||||
|
||||
//bli_trsmnat
|
||||
//(
|
||||
@@ -1431,6 +1442,8 @@ void ztrsm_blis_impl
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef BLIS_ENABLE_MNK1_MATRIX
|
||||
|
||||
if( n0 == 1 )
|
||||
{
|
||||
if( blis_side == BLIS_LEFT )
|
||||
@@ -1635,6 +1648,8 @@ void ztrsm_blis_impl
|
||||
}
|
||||
}
|
||||
|
||||
#endif // End of BLIS_ENABLE_MNK1_MATRIX
|
||||
|
||||
const struc_t struca = BLIS_TRIANGULAR;
|
||||
|
||||
obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1;
|
||||
@@ -1810,7 +1825,7 @@ void ztrsm_blis_impl
|
||||
}
|
||||
} // bli_cpuid_is_avx2fma3_supported
|
||||
|
||||
#endif //#ifdef BLIS_ENABLE_SMALL_MATRIX
|
||||
#endif // End of BLIS_ENABLE_SMALL_MATRIX
|
||||
|
||||
//bli_trsmnat
|
||||
//(
|
||||
@@ -1954,6 +1969,8 @@ void ctrsm_blis_impl
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef BLIS_ENABLE_MNK1_MATRIX
|
||||
|
||||
if( n0 == 1 )
|
||||
{
|
||||
if( blis_side == BLIS_LEFT )
|
||||
@@ -2158,6 +2175,8 @@ void ctrsm_blis_impl
|
||||
}
|
||||
}
|
||||
|
||||
#endif // End of BLIS_ENABLE_MNK1_MATRIX
|
||||
|
||||
const struc_t struca = BLIS_TRIANGULAR;
|
||||
|
||||
obj_t alphao = BLIS_OBJECT_INITIALIZER_1X1;
|
||||
@@ -2215,7 +2234,7 @@ void ctrsm_blis_impl
|
||||
}
|
||||
} // bli_cpuid_is_avx2fma3_supported
|
||||
|
||||
#endif //#ifdef BLIS_ENABLE_SMALL_MATRIX
|
||||
#endif // End of BLIS_ENABLE_SMALL_MATRIX
|
||||
|
||||
//bli_trsmnat
|
||||
//(
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2019 - 2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -269,3 +269,21 @@
|
||||
|
||||
#endif
|
||||
|
||||
// -- CODE PATH ENABLEMENT --------------------------------------------------
|
||||
#ifdef BLIS_ENABLE_MNK1_MATRIX
|
||||
#define IF_BLIS_ENABLE_MNK1_MATRIX(...) __VA_ARGS__
|
||||
#else
|
||||
#define IF_BLIS_ENABLE_MNK1_MATRIX(...)
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_TINY_MATRIX
|
||||
#define IF_BLIS_ENABLE_TINY_MATRIX(...) __VA_ARGS__
|
||||
#else
|
||||
#define IF_BLIS_ENABLE_TINY_MATRIX(...)
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_SMALL_MATRIX
|
||||
#define IF_BLIS_ENABLE_SMALL_MATRIX(...) __VA_ARGS__
|
||||
#else
|
||||
#define IF_BLIS_ENABLE_SMALL_MATRIX(...)
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user