AVX512 optimizations for CGEMM(SUP)

- Implemented the following AVX512 SUP
  column-preferential kernels(m-variant) for CGEMM :
  Main kernel    : 24x4m
  Fringe kernels : 24x3m, 24x2m, 24x1m,
                   16x4, 16x3, 16x2, 16x1,
                   8x4, 8x3, 8x2, 8x1,
                   fx4, fx3, fx2, fx1(where 0<f<8).

- Utlized the packing kernel to pack A when
  handling inputs with CRC storage scheme. This
  would in turn handle RRC with operation transpose
  in the framework layer.

- Further adding C prefetching to the main kernel,
  and updated the cache-blocking parameters for
  ZEN4 and ZEN5 contexts.

- Added a set of decision logics to choose between
  SUP and Native AVX512 code-paths for ZEN4 and ZEN5
  architectures.

- Updated the testing interface for complex GEMMSUP
  to accept the kernel dimension(MR) as a parameter, in
  order to set the appropriate panel stride for functional
  and memory testing. Also updated the existing instantiators
  to send their kernel dimensions as a parameter.

- Added unit tests for functional and memory testing of these
  newly added kernels.

AMD-Internal: [CPUPL-6498]

Change-Id: Ie79d3d0dc7eed7edf30d8d4f74b888135f31d6b4
This commit is contained in:
Vignesh Balasubramanian
2025-03-03 19:04:03 +05:30
committed by Vignesh Balasubramanian
parent 8998839c71
commit 07df9f471e
11 changed files with 9046 additions and 321 deletions

View File

@@ -317,12 +317,12 @@ void bli_cntx_init_zen4( cntx_t* cntx )
BLIS_CCR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x64n_avx512, TRUE,
BLIS_CCC, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x64n_avx512, TRUE,
BLIS_RRR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8m, TRUE,
BLIS_RCR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8m, TRUE,
BLIS_CRR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8m, TRUE,
BLIS_RCC, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8n, TRUE,
BLIS_CCR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8n, TRUE,
BLIS_CCC, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8n, TRUE,
BLIS_RRR, BLIS_SCOMPLEX, bli_cgemmsup_cv_zen4_asm_24x4m, FALSE,
BLIS_RCR, BLIS_SCOMPLEX, bli_cgemmsup_cv_zen4_asm_24x4m, FALSE,
BLIS_CRR, BLIS_SCOMPLEX, bli_cgemmsup_cv_zen4_asm_24x4m, FALSE,
BLIS_RCC, BLIS_SCOMPLEX, bli_cgemmsup_cv_zen4_asm_24x4m, FALSE,
BLIS_CCR, BLIS_SCOMPLEX, bli_cgemmsup_cv_zen4_asm_24x4m, FALSE,
BLIS_CCC, BLIS_SCOMPLEX, bli_cgemmsup_cv_zen4_asm_24x4m, FALSE,
BLIS_RRR, BLIS_DCOMPLEX, bli_zgemmsup_cv_zen4_asm_12x4m, FALSE,
BLIS_RRC, BLIS_DCOMPLEX, bli_zgemmsup_cd_zen4_asm_12x4m, FALSE,
@@ -338,12 +338,12 @@ void bli_cntx_init_zen4( cntx_t* cntx )
// Initialize level-3 sup blocksize objects with architecture-specific
// values.
// s d c z
bli_blksz_init ( &blkszs[ BLIS_MR ], 6, 24, 3, 12,
6, 9, 3, 12 );
bli_blksz_init_easy( &blkszs[ BLIS_NR ], 64, 8, 8, 4 );
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 192, 144, 72, 48 );
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 512, 480, 128, 64 );
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8064, 4080, 2040, 1020 );
bli_blksz_init ( &blkszs[ BLIS_MR ], 6, 24, 24, 12,
6, 9, 24, 12 );
bli_blksz_init_easy( &blkszs[ BLIS_NR ], 64, 8, 4, 4 );
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 192, 144, 120, 48 );
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 512, 480, 512, 64 );
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8064, 4080, 4080, 1020 );
// Update the context with the current architecture's register and cache
// blocksizes for small/unpacked level-3 problems.

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2019 - 2024, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -114,7 +114,7 @@ err_t bli_gemmsup
if(( id == BLIS_ARCH_ZEN5 ) || ( id == BLIS_ARCH_ZEN4 ))
{
if(( bli_obj_dt(a) == BLIS_DOUBLE ))
if(( bli_obj_dt(a) == BLIS_DOUBLE ) || ( bli_obj_dt(a) == BLIS_SCOMPLEX ))
{
// Pack A to avoid RD kernels.
if((stor_id == BLIS_CRC || stor_id == BLIS_RRC))

View File

@@ -4,7 +4,7 @@
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2019 - 2023, Advanced Micro Devices, Inc. All rights reserved.
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -198,7 +198,7 @@ BLIS_INLINE void bli_gemmsup_ref_var1n2m_opt_cases
}
else
{
if ( ( dt == BLIS_DOUBLE ) || ( dt == BLIS_DCOMPLEX ) )
if ( ( dt == BLIS_DOUBLE ) || ( dt == BLIS_DCOMPLEX ) || ( dt == BLIS_SCOMPLEX ) )
{
// The optimizations are only done for CRC and RRC storage schemes to avoid RD kernels.
// Optimizations for other storage schemes is yet to be done.

View File

@@ -40,6 +40,22 @@
#ifdef AOCL_DEV
#define K_bli_cgemmsup_cv_zen4_asm_24x4m 1
#define K_bli_cgemmsup_cv_zen4_asm_24x3m 1
#define K_bli_cgemmsup_cv_zen4_asm_24x2m 1
#define K_bli_cgemmsup_cv_zen4_asm_24x1m 1
#define K_bli_cgemmsup_cv_zen4_asm_16x4 1
#define K_bli_cgemmsup_cv_zen4_asm_16x3 1
#define K_bli_cgemmsup_cv_zen4_asm_16x2 1
#define K_bli_cgemmsup_cv_zen4_asm_16x1 1
#define K_bli_cgemmsup_cv_zen4_asm_8x4 1
#define K_bli_cgemmsup_cv_zen4_asm_8x3 1
#define K_bli_cgemmsup_cv_zen4_asm_8x2 1
#define K_bli_cgemmsup_cv_zen4_asm_8x1 1
#define K_bli_cgemmsup_cv_zen4_asm_fx4 1
#define K_bli_cgemmsup_cv_zen4_asm_fx3 1
#define K_bli_cgemmsup_cv_zen4_asm_fx2 1
#define K_bli_cgemmsup_cv_zen4_asm_fx1 1
#define K_bli_cgemm_zen4_asm_24x4 1
#define K_bli_cgemm_zen4_asm_4x24 1
#define K_bli_zgemmsup_cd_zen4_asm_12x2m 1

File diff suppressed because it is too large Load Diff

View File

@@ -73,7 +73,7 @@
/**********************************************************************/
template<typename T, typename FT>
static void test_complex_gemmsup_ukr( char storage, char trnsa, char trnsb, gtint_t m, gtint_t n, gtint_t k, T alpha, T beta, double thresh, FT ukr_fp, bool is_memory_test = false )
static void test_complex_gemmsup_ukr( char storage, char trnsa, char trnsb, gtint_t m, gtint_t n, gtint_t k, gtint_t MR, T alpha, T beta, double thresh, FT ukr_fp, bool is_memory_test = false )
{
// Compute the leading dimensions of a, b, and c.
gtint_t lda = testinghelpers::get_leading_dimension( storage, trnsa, m, k, 0 );
@@ -161,7 +161,7 @@ static void test_complex_gemmsup_ukr( char storage, char trnsa, char trnsb, gtin
{
auxinfo_t data;
//Panel stride update is required only for zen4 sup kernels
inc_t ps_a_use = (12 * rs_a); //12 = MR
inc_t ps_a_use = (MR * rs_a);
bli_auxinfo_set_ps_a( ps_a_use, &data );
ukr_fp(
@@ -196,7 +196,7 @@ static void test_complex_gemmsup_ukr( char storage, char trnsa, char trnsb, gtin
// second call to ukr
auxinfo_t data;
inc_t ps_a_use = (12 * rs_a); //12 = MR
inc_t ps_a_use = (MR * rs_a);
bli_auxinfo_set_ps_a( ps_a_use, &data );
ukr_fp(

View File

@@ -49,6 +49,7 @@ class zgemmGenericSUP:
dcomplex, // beta
char, // storage of C matrix
zgemmsup_ker_ft, // Function pointer type for zgemm kernel
gtint_t, // MR/NR(to set panel stride based on m-var/n-var)
char, // transa
char, // transb
bool // is_memory_test
@@ -66,9 +67,10 @@ TEST_P( zgemmGenericSUP, UKR )
T beta = std::get<4>(GetParam()); // beta
char storageC = std::get<5>(GetParam()); // storage scheme for C matrix
zgemmsup_ker_ft kern_ptr = std::get<6>(GetParam()); // pointer to the gemm kernel
char transa = std::get<7>(GetParam()); // transa
char transb = std::get<8>(GetParam()); // transb
bool is_memory_test = std::get<9>(GetParam()); // is_memory_test
gtint_t MR = std::get<7>(GetParam()); // ukr dimension MR
char transa = std::get<8>(GetParam()); // transa
char transb = std::get<9>(GetParam()); // transb
bool is_memory_test = std::get<10>(GetParam()); // is_memory_test
// Set the threshold for the errors:
// Check gtestsuite gemm.h or netlib source code for reminder of the
@@ -92,14 +94,14 @@ TEST_P( zgemmGenericSUP, UKR )
#endif
thresh = adj*(3*k+1)*testinghelpers::getEpsilon<T>();
}
test_complex_gemmsup_ukr(storageC, transa, transb, m, n, k, alpha, beta, thresh, kern_ptr, is_memory_test);
test_complex_gemmsup_ukr(storageC, transa, transb, m, n, k, MR, alpha, beta, thresh, kern_ptr, is_memory_test);
}// end of function
class zgemmGenericSUPPrint {
public:
std::string operator()(
testing::TestParamInfo<std::tuple<gtint_t, gtint_t, gtint_t, dcomplex, dcomplex, char,
zgemmsup_ker_ft, char, char, bool>> str) const {
zgemmsup_ker_ft, gtint_t, char, char, bool>> str) const {
gtint_t m = std::get<0>(str.param);
gtint_t n = std::get<1>(str.param);
@@ -107,9 +109,9 @@ public:
dcomplex alpha = std::get<3>(str.param);
dcomplex beta = std::get<4>(str.param);
char storageC = std::get<5>(str.param);
char transa = std::get<7>(str.param);
char transb = std::get<8>(str.param);
bool is_memory_test = std::get<9>(str.param);
char transa = std::get<8>(str.param);
char transb = std::get<9>(str.param);
bool is_memory_test = std::get<10>(str.param);
std::string str_name;
str_name += "_stor_" + std::string(&storageC, 1);
@@ -141,6 +143,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -5.0}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('r'), // storage of c
::testing::Values(bli_zgemmsup_rv_zen_asm_3x4m), // zgemm_sup kernel
::testing::Values(gtint_t(3)), // Micro kernel block MR
::testing::Values('t'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -161,6 +164,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 5.0}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('r'), // storage of c
::testing::Values(bli_zgemmsup_rv_zen_asm_2x4), // zgemm_sup kernel
::testing::Values(gtint_t(2)), // Micro kernel block MR
::testing::Values('t'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -181,6 +185,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 5.4}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('r'), // storage of c
::testing::Values(bli_zgemmsup_rv_zen_asm_1x4), // zgemm_sup kernel
::testing::Values(gtint_t(1)), // Micro kernel block MR
::testing::Values('t'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -201,6 +206,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 9}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('r'), // storage of c
::testing::Values(bli_zgemmsup_rv_zen_asm_3x2m), // zgemm_sup kernel
::testing::Values(gtint_t(3)), // Micro kernel block MR
::testing::Values('t'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -221,6 +227,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 2.3}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('r'), // storage of c
::testing::Values(bli_zgemmsup_rv_zen_asm_3x2), // zgemm_sup kernel
::testing::Values(gtint_t(3)), // Micro kernel block MR
::testing::Values('t'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -241,6 +248,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 13}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('r'), // storage of c
::testing::Values(bli_zgemmsup_rv_zen_asm_2x2), // zgemm_sup kernel
::testing::Values(gtint_t(2)), // Micro kernel block MR
::testing::Values('t'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -261,6 +269,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 3}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('r'), // storage of c
::testing::Values(bli_zgemmsup_rv_zen_asm_1x2), // zgemm_sup kernel
::testing::Values(gtint_t(1)), // Micro kernel block MR
::testing::Values('t'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -281,6 +290,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1.9}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_rv_zen_asm_3x4m), // zgemm_sup kernel
::testing::Values(gtint_t(3)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('t'), // transb
::testing::Values(false, true) // is_memory_test
@@ -301,6 +311,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 3.9}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_rv_zen_asm_3x2m), // zgemm_sup kernel
::testing::Values(gtint_t(3)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('t'), // transb
::testing::Values(false, true) // is_memory_test
@@ -321,6 +332,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1.4}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_rv_zen_asm_3x2), // zgemm_sup kernel
::testing::Values(gtint_t(3)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('t'), // transb
::testing::Values(false, true) // is_memory_test
@@ -341,6 +353,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1.99}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_rv_zen_asm_2x4), // zgemm_sup kernel
::testing::Values(gtint_t(2)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('t'), // transb
::testing::Values(false, true) // is_memory_test
@@ -361,6 +374,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0},dcomplex{0.0, 1.9}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_rv_zen_asm_1x4), // zgemm_sup kernel
::testing::Values(gtint_t(1)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('t'), // transb
::testing::Values(false, true) // is_memory_test
@@ -381,6 +395,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -1.3}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_rv_zen_asm_2x2), // zgemm_sup kernel
::testing::Values(gtint_t(2)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('t'), // transb
::testing::Values(false, true) // is_memory_test
@@ -401,6 +416,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 2.3}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_rv_zen_asm_1x2), // zgemm_sup kernel
::testing::Values(gtint_t(1)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('t'), // transb
::testing::Values(false, true) // is_memory_test
@@ -421,6 +437,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 2.9}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('r'), // storage of c
::testing::Values(bli_zgemmsup_rd_zen_asm_3x4m), // zgemm_sup kernel
::testing::Values(gtint_t(3)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('t'), // transb
::testing::Values(false, true) // is_memory_test
@@ -441,6 +458,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1.19}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('r'), // storage of c
::testing::Values(bli_zgemmsup_rd_zen_asm_3x2m), // zgemm_sup kernel
::testing::Values(gtint_t(3)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('t'), // transb
::testing::Values(false, true) // is_memory_test
@@ -461,6 +479,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 2.9}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('r'), // storage of c
::testing::Values(bli_zgemmsup_rd_zen_asm_3x4n), // zgemm_sup kernel
::testing::Values(gtint_t(3)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('t'), // transb
::testing::Values(false, true) // is_memory_test
@@ -481,6 +500,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1.23}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('r'), // storage of c
::testing::Values(bli_zgemmsup_rd_zen_asm_2x4n), // zgemm_sup kernel
::testing::Values(gtint_t(2)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('t'), // transb
::testing::Values(false, true) // is_memory_test
@@ -501,6 +521,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 2.9}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('r'), // storage of c
::testing::Values(bli_zgemmsup_rd_zen_asm_2x4), // zgemm_sup kernel
::testing::Values(gtint_t(2)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('t'), // transb
::testing::Values(false, true) // is_memory_test
@@ -521,6 +542,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 21.9}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('r'), // storage of c
::testing::Values(bli_zgemmsup_rd_zen_asm_1x4), // zgemm_sup kernel
::testing::Values(gtint_t(1)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('t'), // transb
::testing::Values(false, true) // is_memory_test
@@ -541,6 +563,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -21.9}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('r'), // storage of c
::testing::Values(bli_zgemmsup_rd_zen_asm_1x2), // zgemm_sup kernel
::testing::Values(gtint_t(1)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('t'), // transb
::testing::Values(false, true) // is_memory_test
@@ -561,6 +584,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -2.3}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('r'), // storage of c
::testing::Values(bli_zgemmsup_rd_zen_asm_2x2), // zgemm_sup kernel
::testing::Values(gtint_t(2)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('t'), // transb
::testing::Values(false, true) // is_memory_test
@@ -581,6 +605,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -3}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_rv_zen_asm_3x4n), // zgemm_sup kernel
::testing::Values(gtint_t(3)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('t'), // transb
::testing::Values(false, true) // is_memory_test
@@ -601,6 +626,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -1.9}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_rv_zen_asm_2x4n), // zgemm_sup kernel
::testing::Values(gtint_t(2)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('t'), // transb
::testing::Values(false, true) // is_memory_test
@@ -621,6 +647,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 5.6}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_rv_zen_asm_1x4n), // zgemm_sup kernel
::testing::Values(gtint_t(1)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('t'), // transb
::testing::Values(false, true) // is_memory_test
@@ -641,6 +668,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1.3}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('r'), // storage of c
::testing::Values(bli_zgemmsup_rv_zen_asm_3x4n), // zgemm_sup kernel
::testing::Values(gtint_t(3)), // Micro kernel block MR
::testing::Values('t'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -661,6 +689,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1.9}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('r'), // storage of c
::testing::Values(bli_zgemmsup_rv_zen_asm_2x4n), // zgemm_sup kernel
::testing::Values(gtint_t(2)), // Micro kernel block MR
::testing::Values('t'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -681,6 +710,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -1.3}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('r'), // storage of c
::testing::Values(bli_zgemmsup_rv_zen_asm_1x4n), // zgemm_sup kernel
::testing::Values(gtint_t(1)), // Micro kernel block MR
::testing::Values('t'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -705,6 +735,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -9}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_cv_zen4_asm_12x4m), // zgemm_sup kernel
::testing::Values(gtint_t(12)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -725,6 +756,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 9}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_cv_zen4_asm_12x3m), // zgemm_sup kernel
::testing::Values(gtint_t(12)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -745,6 +777,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -21.9}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_cv_zen4_asm_12x2m), // zgemm_sup kernel
::testing::Values(gtint_t(12)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -765,6 +798,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1.4}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_cv_zen4_asm_12x1m), // zgemm_sup kernel
::testing::Values(gtint_t(12)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -785,6 +819,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 8}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_cv_zen4_asm_8x4), // zgemm_sup kernel
::testing::Values(gtint_t(8)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -805,6 +840,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -1.8}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_cv_zen4_asm_8x3), // zgemm_sup kernel
::testing::Values(gtint_t(8)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -825,6 +861,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 9}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_cv_zen4_asm_8x2), // zgemm_sup kernel
::testing::Values(gtint_t(8)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -845,6 +882,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -2}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_cv_zen4_asm_8x1), // zgemm_sup kernel
::testing::Values(gtint_t(8)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -865,6 +903,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 9}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_cv_zen4_asm_4x4), // zgemm_sup kernel
::testing::Values(gtint_t(4)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -885,6 +924,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1.5}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_cv_zen4_asm_4x3), // zgemm_sup kernel
::testing::Values(gtint_t(4)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -905,6 +945,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -9}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_cv_zen4_asm_4x2), // zgemm_sup kernel
::testing::Values(gtint_t(4)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -925,6 +966,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_cv_zen4_asm_4x1), // zgemm_sup kernel
::testing::Values(gtint_t(4)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -945,6 +987,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1.8}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_cv_zen4_asm_2x4), // zgemm_sup kernel
::testing::Values(gtint_t(2)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -965,6 +1008,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_cv_zen4_asm_2x3), // zgemm_sup kernel
::testing::Values(gtint_t(2)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -985,6 +1029,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 9}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_cv_zen4_asm_2x2), // zgemm_sup kernel
::testing::Values(gtint_t(2)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -1005,6 +1050,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -9}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_cv_zen4_asm_2x1), // zgemm_sup kernel
::testing::Values(gtint_t(2)), // Micro kernel block MR
::testing::Values('n'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -1025,6 +1071,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -9}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('r'), // storage of c
::testing::Values(bli_zgemmsup_cv_zen4_asm_12x4m), // zgemm_sup kernel
::testing::Values(gtint_t(12)), // Micro kernel block MR
::testing::Values('t'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -1045,6 +1092,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1.2}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('r'), // storage of c
::testing::Values(bli_zgemmsup_cv_zen4_asm_12x3m), // zgemm_sup kernel
::testing::Values(gtint_t(12)), // Micro kernel block MR
::testing::Values('t'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -1065,6 +1113,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 8.9}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('r'), // storage of c
::testing::Values(bli_zgemmsup_cv_zen4_asm_12x2m), // zgemm_sup kernel
::testing::Values(gtint_t(12)), // Micro kernel block MR
::testing::Values('t'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -1085,6 +1134,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 19}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('r'), // storage of c
::testing::Values(bli_zgemmsup_cv_zen4_asm_12x1m), // zgemm_sup kernel
::testing::Values(gtint_t(12)), // Micro kernel block MR
::testing::Values('t'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -1113,6 +1163,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -9.0}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_cd_zen4_asm_12x4m), // zgemm_sup kernel
::testing::Values(gtint_t(12)), // Micro kernel block MR
::testing::Values('t'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -1140,6 +1191,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -9.0}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_cd_zen4_asm_12x2m), // zgemm_sup kernel
::testing::Values(gtint_t(12)), // Micro kernel block MR
::testing::Values('t'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -1164,6 +1216,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -9.0}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_cd_zen4_asm_8x4), // zgemm_sup kernel
::testing::Values(gtint_t(8)), // Micro kernel block MR
::testing::Values('t'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -1188,6 +1241,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -9.0}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_cd_zen4_asm_8x2), // zgemm_sup kernel
::testing::Values(gtint_t(8)), // Micro kernel block MR
::testing::Values('t'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -1212,6 +1266,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -9.0}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_cd_zen4_asm_4x4), // zgemm_sup kernel
::testing::Values(gtint_t(4)), // Micro kernel block MR
::testing::Values('t'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -1236,6 +1291,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -9.0}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_cd_zen4_asm_4x2), // zgemm_sup kernel
::testing::Values(gtint_t(4)), // Micro kernel block MR
::testing::Values('t'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -1260,6 +1316,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -9.0}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_cd_zen4_asm_2x4), // zgemm_sup kernel
::testing::Values(gtint_t(2)), // Micro kernel block MR
::testing::Values('t'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test
@@ -1284,6 +1341,7 @@ INSTANTIATE_TEST_SUITE_P(
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -9.0}, dcomplex{-7.3, 6.7}), // beta value
::testing::Values('c'), // storage of c
::testing::Values(bli_zgemmsup_cd_zen4_asm_2x2), // zgemm_sup kernel
::testing::Values(gtint_t(2)), // Micro kernel block MR
::testing::Values('t'), // transa
::testing::Values('n'), // transb
::testing::Values(false, true) // is_memory_test

File diff suppressed because it is too large Load Diff

View File

@@ -93,6 +93,48 @@ bool bli_cntx_gemmsup_thresh_is_met_zen4( obj_t* a, obj_t* b, obj_t* c, cntx_t*
if( ( m <= 216 ) && ( n <= 216 ) && ( k <= 216 ) ) return TRUE;
return FALSE;
}
else if( dt == BLIS_SCOMPLEX )
{
dim_t k = bli_obj_width_after_trans( a );
dim_t m, n;
const stor3_t stor_id = bli_obj_stor3_from_strides( c, a, b );
if ( bli_cntx_l3_sup_ker_dislikes_storage_of( c, stor_id, cntx ) )
{
m = bli_obj_width(c);
n = bli_obj_length(c);
}
else
{
m = bli_obj_length( c );
n = bli_obj_width( c );
}
// The threshold conditionals are as follows:
if( n <= 540 )
{
if( n <= 420 ) return TRUE;
else if( m <= 1260 ) return TRUE;
}
else
{
if( m <= 420 )
{
if( m <= 180 ) return TRUE;
else if( n <= 2100 ) return TRUE;
}
else
{
if( k <= 540 )
{
if( n <= 1260 ) return TRUE;
else if( m <= 900 ) return TRUE;
}
}
}
return FALSE;
}
else
return bli_cntx_l3_sup_thresh_is_met( a, b, c, cntx );
}

View File

@@ -332,6 +332,24 @@ GEMMSUP_KER_PROT( double, d, gemmsup_rv_zen4_asm_24x1)
GEMMSUP_KER_PROT( double, d, gemmsup_rv_zen4_asm_16x1)
GEMMSUP_KER_PROT( double, d, gemmsup_rv_zen4_asm_8x1)
// Cgemm sup CV kernels
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_24x4m )
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_24x3m )
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_24x2m )
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_24x1m )
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_16x4 )
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_16x3 )
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_16x2 )
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_16x1 )
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_8x4 )
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_8x3 )
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_8x2 )
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_8x1 )
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_fx4 )
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_fx3 )
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_fx2 )
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_fx1 )
// Zgemm sup CV kernels
GEMMSUP_KER_PROT( dcomplex, z, gemmsup_cv_zen4_asm_12x4m )
GEMMSUP_KER_PROT( dcomplex, z, gemmsup_cv_zen4_asm_12x3m )

View File

@@ -93,6 +93,48 @@ bool bli_cntx_gemmsup_thresh_is_met_zen5( obj_t* a, obj_t* b, obj_t* c, cntx_t*
if( ( m <= 216 ) && ( n <= 216 ) && ( k <= 216 ) ) return TRUE;
return FALSE;
}
else if( dt == BLIS_SCOMPLEX )
{
dim_t k = bli_obj_width_after_trans( a );
dim_t m, n;
const stor3_t stor_id = bli_obj_stor3_from_strides( c, a, b );
if ( bli_cntx_l3_sup_ker_dislikes_storage_of( c, stor_id, cntx ) )
{
m = bli_obj_width(c);
n = bli_obj_length(c);
}
else
{
m = bli_obj_length( c );
n = bli_obj_width( c );
}
// The threshold conditionals are as follows:
if( n <= 540 )
{
if( n <= 420 ) return TRUE;
else if( m <= 1260 ) return TRUE;
}
else
{
if( m <= 420 )
{
if( m <= 180 ) return TRUE;
else if( n <= 2100 ) return TRUE;
}
else
{
if( k <= 540 )
{
if( n <= 1260 ) return TRUE;
else if( m <= 900 ) return TRUE;
}
}
}
return FALSE;
}
else
return bli_cntx_l3_sup_thresh_is_met( a, b, c, cntx );
}