mirror of
https://github.com/amd/blis.git
synced 2026-04-20 07:38:53 +00:00
AVX512 optimizations for CGEMM(SUP)
- Implemented the following AVX512 SUP
column-preferential kernels(m-variant) for CGEMM :
Main kernel : 24x4m
Fringe kernels : 24x3m, 24x2m, 24x1m,
16x4, 16x3, 16x2, 16x1,
8x4, 8x3, 8x2, 8x1,
fx4, fx3, fx2, fx1(where 0<f<8).
- Utlized the packing kernel to pack A when
handling inputs with CRC storage scheme. This
would in turn handle RRC with operation transpose
in the framework layer.
- Further adding C prefetching to the main kernel,
and updated the cache-blocking parameters for
ZEN4 and ZEN5 contexts.
- Added a set of decision logics to choose between
SUP and Native AVX512 code-paths for ZEN4 and ZEN5
architectures.
- Updated the testing interface for complex GEMMSUP
to accept the kernel dimension(MR) as a parameter, in
order to set the appropriate panel stride for functional
and memory testing. Also updated the existing instantiators
to send their kernel dimensions as a parameter.
- Added unit tests for functional and memory testing of these
newly added kernels.
AMD-Internal: [CPUPL-6498]
Change-Id: Ie79d3d0dc7eed7edf30d8d4f74b888135f31d6b4
This commit is contained in:
committed by
Vignesh Balasubramanian
parent
8998839c71
commit
07df9f471e
@@ -317,12 +317,12 @@ void bli_cntx_init_zen4( cntx_t* cntx )
|
||||
BLIS_CCR, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x64n_avx512, TRUE,
|
||||
BLIS_CCC, BLIS_FLOAT, bli_sgemmsup_rv_zen_asm_6x64n_avx512, TRUE,
|
||||
|
||||
BLIS_RRR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8m, TRUE,
|
||||
BLIS_RCR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8m, TRUE,
|
||||
BLIS_CRR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8m, TRUE,
|
||||
BLIS_RCC, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8n, TRUE,
|
||||
BLIS_CCR, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8n, TRUE,
|
||||
BLIS_CCC, BLIS_SCOMPLEX, bli_cgemmsup_rv_zen_asm_3x8n, TRUE,
|
||||
BLIS_RRR, BLIS_SCOMPLEX, bli_cgemmsup_cv_zen4_asm_24x4m, FALSE,
|
||||
BLIS_RCR, BLIS_SCOMPLEX, bli_cgemmsup_cv_zen4_asm_24x4m, FALSE,
|
||||
BLIS_CRR, BLIS_SCOMPLEX, bli_cgemmsup_cv_zen4_asm_24x4m, FALSE,
|
||||
BLIS_RCC, BLIS_SCOMPLEX, bli_cgemmsup_cv_zen4_asm_24x4m, FALSE,
|
||||
BLIS_CCR, BLIS_SCOMPLEX, bli_cgemmsup_cv_zen4_asm_24x4m, FALSE,
|
||||
BLIS_CCC, BLIS_SCOMPLEX, bli_cgemmsup_cv_zen4_asm_24x4m, FALSE,
|
||||
|
||||
BLIS_RRR, BLIS_DCOMPLEX, bli_zgemmsup_cv_zen4_asm_12x4m, FALSE,
|
||||
BLIS_RRC, BLIS_DCOMPLEX, bli_zgemmsup_cd_zen4_asm_12x4m, FALSE,
|
||||
@@ -338,12 +338,12 @@ void bli_cntx_init_zen4( cntx_t* cntx )
|
||||
// Initialize level-3 sup blocksize objects with architecture-specific
|
||||
// values.
|
||||
// s d c z
|
||||
bli_blksz_init ( &blkszs[ BLIS_MR ], 6, 24, 3, 12,
|
||||
6, 9, 3, 12 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NR ], 64, 8, 8, 4 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 192, 144, 72, 48 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 512, 480, 128, 64 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8064, 4080, 2040, 1020 );
|
||||
bli_blksz_init ( &blkszs[ BLIS_MR ], 6, 24, 24, 12,
|
||||
6, 9, 24, 12 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NR ], 64, 8, 4, 4 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 192, 144, 120, 48 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 512, 480, 512, 64 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8064, 4080, 4080, 1020 );
|
||||
|
||||
// Update the context with the current architecture's register and cache
|
||||
// blocksizes for small/unpacked level-3 problems.
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2019 - 2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -114,7 +114,7 @@ err_t bli_gemmsup
|
||||
|
||||
if(( id == BLIS_ARCH_ZEN5 ) || ( id == BLIS_ARCH_ZEN4 ))
|
||||
{
|
||||
if(( bli_obj_dt(a) == BLIS_DOUBLE ))
|
||||
if(( bli_obj_dt(a) == BLIS_DOUBLE ) || ( bli_obj_dt(a) == BLIS_SCOMPLEX ))
|
||||
{
|
||||
// Pack A to avoid RD kernels.
|
||||
if((stor_id == BLIS_CRC || stor_id == BLIS_RRC))
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2019 - 2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
Copyright (C) 2019 - 2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -198,7 +198,7 @@ BLIS_INLINE void bli_gemmsup_ref_var1n2m_opt_cases
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( ( dt == BLIS_DOUBLE ) || ( dt == BLIS_DCOMPLEX ) )
|
||||
if ( ( dt == BLIS_DOUBLE ) || ( dt == BLIS_DCOMPLEX ) || ( dt == BLIS_SCOMPLEX ) )
|
||||
{
|
||||
// The optimizations are only done for CRC and RRC storage schemes to avoid RD kernels.
|
||||
// Optimizations for other storage schemes is yet to be done.
|
||||
|
||||
@@ -40,6 +40,22 @@
|
||||
|
||||
#ifdef AOCL_DEV
|
||||
|
||||
#define K_bli_cgemmsup_cv_zen4_asm_24x4m 1
|
||||
#define K_bli_cgemmsup_cv_zen4_asm_24x3m 1
|
||||
#define K_bli_cgemmsup_cv_zen4_asm_24x2m 1
|
||||
#define K_bli_cgemmsup_cv_zen4_asm_24x1m 1
|
||||
#define K_bli_cgemmsup_cv_zen4_asm_16x4 1
|
||||
#define K_bli_cgemmsup_cv_zen4_asm_16x3 1
|
||||
#define K_bli_cgemmsup_cv_zen4_asm_16x2 1
|
||||
#define K_bli_cgemmsup_cv_zen4_asm_16x1 1
|
||||
#define K_bli_cgemmsup_cv_zen4_asm_8x4 1
|
||||
#define K_bli_cgemmsup_cv_zen4_asm_8x3 1
|
||||
#define K_bli_cgemmsup_cv_zen4_asm_8x2 1
|
||||
#define K_bli_cgemmsup_cv_zen4_asm_8x1 1
|
||||
#define K_bli_cgemmsup_cv_zen4_asm_fx4 1
|
||||
#define K_bli_cgemmsup_cv_zen4_asm_fx3 1
|
||||
#define K_bli_cgemmsup_cv_zen4_asm_fx2 1
|
||||
#define K_bli_cgemmsup_cv_zen4_asm_fx1 1
|
||||
#define K_bli_cgemm_zen4_asm_24x4 1
|
||||
#define K_bli_cgemm_zen4_asm_4x24 1
|
||||
#define K_bli_zgemmsup_cd_zen4_asm_12x2m 1
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -73,7 +73,7 @@
|
||||
/**********************************************************************/
|
||||
|
||||
template<typename T, typename FT>
|
||||
static void test_complex_gemmsup_ukr( char storage, char trnsa, char trnsb, gtint_t m, gtint_t n, gtint_t k, T alpha, T beta, double thresh, FT ukr_fp, bool is_memory_test = false )
|
||||
static void test_complex_gemmsup_ukr( char storage, char trnsa, char trnsb, gtint_t m, gtint_t n, gtint_t k, gtint_t MR, T alpha, T beta, double thresh, FT ukr_fp, bool is_memory_test = false )
|
||||
{
|
||||
// Compute the leading dimensions of a, b, and c.
|
||||
gtint_t lda = testinghelpers::get_leading_dimension( storage, trnsa, m, k, 0 );
|
||||
@@ -161,7 +161,7 @@ static void test_complex_gemmsup_ukr( char storage, char trnsa, char trnsb, gtin
|
||||
{
|
||||
auxinfo_t data;
|
||||
//Panel stride update is required only for zen4 sup kernels
|
||||
inc_t ps_a_use = (12 * rs_a); //12 = MR
|
||||
inc_t ps_a_use = (MR * rs_a);
|
||||
bli_auxinfo_set_ps_a( ps_a_use, &data );
|
||||
|
||||
ukr_fp(
|
||||
@@ -196,7 +196,7 @@ static void test_complex_gemmsup_ukr( char storage, char trnsa, char trnsb, gtin
|
||||
|
||||
// second call to ukr
|
||||
auxinfo_t data;
|
||||
inc_t ps_a_use = (12 * rs_a); //12 = MR
|
||||
inc_t ps_a_use = (MR * rs_a);
|
||||
bli_auxinfo_set_ps_a( ps_a_use, &data );
|
||||
|
||||
ukr_fp(
|
||||
|
||||
@@ -49,6 +49,7 @@ class zgemmGenericSUP:
|
||||
dcomplex, // beta
|
||||
char, // storage of C matrix
|
||||
zgemmsup_ker_ft, // Function pointer type for zgemm kernel
|
||||
gtint_t, // MR/NR(to set panel stride based on m-var/n-var)
|
||||
char, // transa
|
||||
char, // transb
|
||||
bool // is_memory_test
|
||||
@@ -66,9 +67,10 @@ TEST_P( zgemmGenericSUP, UKR )
|
||||
T beta = std::get<4>(GetParam()); // beta
|
||||
char storageC = std::get<5>(GetParam()); // storage scheme for C matrix
|
||||
zgemmsup_ker_ft kern_ptr = std::get<6>(GetParam()); // pointer to the gemm kernel
|
||||
char transa = std::get<7>(GetParam()); // transa
|
||||
char transb = std::get<8>(GetParam()); // transb
|
||||
bool is_memory_test = std::get<9>(GetParam()); // is_memory_test
|
||||
gtint_t MR = std::get<7>(GetParam()); // ukr dimension MR
|
||||
char transa = std::get<8>(GetParam()); // transa
|
||||
char transb = std::get<9>(GetParam()); // transb
|
||||
bool is_memory_test = std::get<10>(GetParam()); // is_memory_test
|
||||
|
||||
// Set the threshold for the errors:
|
||||
// Check gtestsuite gemm.h or netlib source code for reminder of the
|
||||
@@ -92,14 +94,14 @@ TEST_P( zgemmGenericSUP, UKR )
|
||||
#endif
|
||||
thresh = adj*(3*k+1)*testinghelpers::getEpsilon<T>();
|
||||
}
|
||||
test_complex_gemmsup_ukr(storageC, transa, transb, m, n, k, alpha, beta, thresh, kern_ptr, is_memory_test);
|
||||
test_complex_gemmsup_ukr(storageC, transa, transb, m, n, k, MR, alpha, beta, thresh, kern_ptr, is_memory_test);
|
||||
}// end of function
|
||||
|
||||
class zgemmGenericSUPPrint {
|
||||
public:
|
||||
std::string operator()(
|
||||
testing::TestParamInfo<std::tuple<gtint_t, gtint_t, gtint_t, dcomplex, dcomplex, char,
|
||||
zgemmsup_ker_ft, char, char, bool>> str) const {
|
||||
zgemmsup_ker_ft, gtint_t, char, char, bool>> str) const {
|
||||
|
||||
gtint_t m = std::get<0>(str.param);
|
||||
gtint_t n = std::get<1>(str.param);
|
||||
@@ -107,9 +109,9 @@ public:
|
||||
dcomplex alpha = std::get<3>(str.param);
|
||||
dcomplex beta = std::get<4>(str.param);
|
||||
char storageC = std::get<5>(str.param);
|
||||
char transa = std::get<7>(str.param);
|
||||
char transb = std::get<8>(str.param);
|
||||
bool is_memory_test = std::get<9>(str.param);
|
||||
char transa = std::get<8>(str.param);
|
||||
char transb = std::get<9>(str.param);
|
||||
bool is_memory_test = std::get<10>(str.param);
|
||||
|
||||
std::string str_name;
|
||||
str_name += "_stor_" + std::string(&storageC, 1);
|
||||
@@ -141,6 +143,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -5.0}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('r'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rv_zen_asm_3x4m), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(3)), // Micro kernel block MR
|
||||
::testing::Values('t'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -161,6 +164,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 5.0}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('r'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rv_zen_asm_2x4), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(2)), // Micro kernel block MR
|
||||
::testing::Values('t'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -181,6 +185,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 5.4}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('r'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rv_zen_asm_1x4), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(1)), // Micro kernel block MR
|
||||
::testing::Values('t'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -201,6 +206,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 9}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('r'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rv_zen_asm_3x2m), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(3)), // Micro kernel block MR
|
||||
::testing::Values('t'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -221,6 +227,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 2.3}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('r'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rv_zen_asm_3x2), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(3)), // Micro kernel block MR
|
||||
::testing::Values('t'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -241,6 +248,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 13}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('r'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rv_zen_asm_2x2), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(2)), // Micro kernel block MR
|
||||
::testing::Values('t'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -261,6 +269,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 3}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('r'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rv_zen_asm_1x2), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(1)), // Micro kernel block MR
|
||||
::testing::Values('t'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -281,6 +290,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1.9}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rv_zen_asm_3x4m), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(3)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('t'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -301,6 +311,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 3.9}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rv_zen_asm_3x2m), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(3)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('t'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -321,6 +332,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1.4}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rv_zen_asm_3x2), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(3)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('t'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -341,6 +353,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1.99}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rv_zen_asm_2x4), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(2)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('t'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -361,6 +374,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0},dcomplex{0.0, 1.9}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rv_zen_asm_1x4), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(1)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('t'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -381,6 +395,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -1.3}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rv_zen_asm_2x2), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(2)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('t'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -401,6 +416,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 2.3}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rv_zen_asm_1x2), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(1)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('t'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -421,6 +437,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 2.9}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('r'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rd_zen_asm_3x4m), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(3)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('t'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -441,6 +458,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1.19}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('r'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rd_zen_asm_3x2m), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(3)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('t'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -461,6 +479,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 2.9}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('r'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rd_zen_asm_3x4n), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(3)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('t'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -481,6 +500,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1.23}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('r'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rd_zen_asm_2x4n), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(2)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('t'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -501,6 +521,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 2.9}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('r'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rd_zen_asm_2x4), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(2)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('t'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -521,6 +542,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 21.9}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('r'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rd_zen_asm_1x4), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(1)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('t'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -541,6 +563,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -21.9}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('r'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rd_zen_asm_1x2), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(1)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('t'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -561,6 +584,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -2.3}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('r'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rd_zen_asm_2x2), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(2)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('t'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -581,6 +605,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -3}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rv_zen_asm_3x4n), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(3)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('t'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -601,6 +626,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -1.9}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rv_zen_asm_2x4n), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(2)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('t'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -621,6 +647,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 5.6}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rv_zen_asm_1x4n), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(1)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('t'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -641,6 +668,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1.3}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('r'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rv_zen_asm_3x4n), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(3)), // Micro kernel block MR
|
||||
::testing::Values('t'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -661,6 +689,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1.9}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('r'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rv_zen_asm_2x4n), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(2)), // Micro kernel block MR
|
||||
::testing::Values('t'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -681,6 +710,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -1.3}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('r'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_rv_zen_asm_1x4n), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(1)), // Micro kernel block MR
|
||||
::testing::Values('t'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -705,6 +735,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -9}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cv_zen4_asm_12x4m), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(12)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -725,6 +756,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 9}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cv_zen4_asm_12x3m), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(12)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -745,6 +777,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -21.9}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cv_zen4_asm_12x2m), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(12)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -765,6 +798,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1.4}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cv_zen4_asm_12x1m), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(12)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -785,6 +819,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 8}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cv_zen4_asm_8x4), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(8)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -805,6 +840,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -1.8}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cv_zen4_asm_8x3), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(8)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -825,6 +861,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 9}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cv_zen4_asm_8x2), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(8)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -845,6 +882,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -2}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cv_zen4_asm_8x1), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(8)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -865,6 +903,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 9}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cv_zen4_asm_4x4), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(4)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -885,6 +924,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1.5}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cv_zen4_asm_4x3), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(4)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -905,6 +945,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -9}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cv_zen4_asm_4x2), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(4)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -925,6 +966,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cv_zen4_asm_4x1), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(4)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -945,6 +987,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1.8}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cv_zen4_asm_2x4), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(2)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -965,6 +1008,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cv_zen4_asm_2x3), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(2)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -985,6 +1029,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 9}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cv_zen4_asm_2x2), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(2)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -1005,6 +1050,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -9}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cv_zen4_asm_2x1), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(2)), // Micro kernel block MR
|
||||
::testing::Values('n'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -1025,6 +1071,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -9}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('r'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cv_zen4_asm_12x4m), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(12)), // Micro kernel block MR
|
||||
::testing::Values('t'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -1045,6 +1092,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 1.2}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('r'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cv_zen4_asm_12x3m), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(12)), // Micro kernel block MR
|
||||
::testing::Values('t'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -1065,6 +1113,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 8.9}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('r'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cv_zen4_asm_12x2m), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(12)), // Micro kernel block MR
|
||||
::testing::Values('t'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -1085,6 +1134,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, 19}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('r'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cv_zen4_asm_12x1m), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(12)), // Micro kernel block MR
|
||||
::testing::Values('t'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -1113,6 +1163,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -9.0}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cd_zen4_asm_12x4m), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(12)), // Micro kernel block MR
|
||||
::testing::Values('t'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -1140,6 +1191,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -9.0}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cd_zen4_asm_12x2m), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(12)), // Micro kernel block MR
|
||||
::testing::Values('t'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -1164,6 +1216,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -9.0}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cd_zen4_asm_8x4), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(8)), // Micro kernel block MR
|
||||
::testing::Values('t'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -1188,6 +1241,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -9.0}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cd_zen4_asm_8x2), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(8)), // Micro kernel block MR
|
||||
::testing::Values('t'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -1212,6 +1266,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -9.0}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cd_zen4_asm_4x4), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(4)), // Micro kernel block MR
|
||||
::testing::Values('t'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -1236,6 +1291,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -9.0}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cd_zen4_asm_4x2), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(4)), // Micro kernel block MR
|
||||
::testing::Values('t'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -1260,6 +1316,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -9.0}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cd_zen4_asm_2x4), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(2)), // Micro kernel block MR
|
||||
::testing::Values('t'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
@@ -1284,6 +1341,7 @@ INSTANTIATE_TEST_SUITE_P(
|
||||
::testing::Values(dcomplex{0.0, 0.0}, dcomplex{1.0, 0.0}, dcomplex{-1.0, 0.0}, dcomplex{-5.0, 0.0}, dcomplex{0.0, -9.0}, dcomplex{-7.3, 6.7}), // beta value
|
||||
::testing::Values('c'), // storage of c
|
||||
::testing::Values(bli_zgemmsup_cd_zen4_asm_2x2), // zgemm_sup kernel
|
||||
::testing::Values(gtint_t(2)), // Micro kernel block MR
|
||||
::testing::Values('t'), // transa
|
||||
::testing::Values('n'), // transb
|
||||
::testing::Values(false, true) // is_memory_test
|
||||
|
||||
7808
kernels/zen4/3/sup/bli_gemmsup_cv_zen4_asm_c24x4m.c
Normal file
7808
kernels/zen4/3/sup/bli_gemmsup_cv_zen4_asm_c24x4m.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -93,6 +93,48 @@ bool bli_cntx_gemmsup_thresh_is_met_zen4( obj_t* a, obj_t* b, obj_t* c, cntx_t*
|
||||
if( ( m <= 216 ) && ( n <= 216 ) && ( k <= 216 ) ) return TRUE;
|
||||
return FALSE;
|
||||
}
|
||||
else if( dt == BLIS_SCOMPLEX )
|
||||
{
|
||||
dim_t k = bli_obj_width_after_trans( a );
|
||||
dim_t m, n;
|
||||
|
||||
const stor3_t stor_id = bli_obj_stor3_from_strides( c, a, b );
|
||||
|
||||
if ( bli_cntx_l3_sup_ker_dislikes_storage_of( c, stor_id, cntx ) )
|
||||
{
|
||||
m = bli_obj_width(c);
|
||||
n = bli_obj_length(c);
|
||||
}
|
||||
else
|
||||
{
|
||||
m = bli_obj_length( c );
|
||||
n = bli_obj_width( c );
|
||||
}
|
||||
|
||||
// The threshold conditionals are as follows:
|
||||
if( n <= 540 )
|
||||
{
|
||||
if( n <= 420 ) return TRUE;
|
||||
else if( m <= 1260 ) return TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( m <= 420 )
|
||||
{
|
||||
if( m <= 180 ) return TRUE;
|
||||
else if( n <= 2100 ) return TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( k <= 540 )
|
||||
{
|
||||
if( n <= 1260 ) return TRUE;
|
||||
else if( m <= 900 ) return TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
else
|
||||
return bli_cntx_l3_sup_thresh_is_met( a, b, c, cntx );
|
||||
}
|
||||
|
||||
@@ -332,6 +332,24 @@ GEMMSUP_KER_PROT( double, d, gemmsup_rv_zen4_asm_24x1)
|
||||
GEMMSUP_KER_PROT( double, d, gemmsup_rv_zen4_asm_16x1)
|
||||
GEMMSUP_KER_PROT( double, d, gemmsup_rv_zen4_asm_8x1)
|
||||
|
||||
// Cgemm sup CV kernels
|
||||
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_24x4m )
|
||||
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_24x3m )
|
||||
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_24x2m )
|
||||
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_24x1m )
|
||||
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_16x4 )
|
||||
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_16x3 )
|
||||
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_16x2 )
|
||||
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_16x1 )
|
||||
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_8x4 )
|
||||
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_8x3 )
|
||||
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_8x2 )
|
||||
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_8x1 )
|
||||
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_fx4 )
|
||||
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_fx3 )
|
||||
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_fx2 )
|
||||
GEMMSUP_KER_PROT( scomplex, c, gemmsup_cv_zen4_asm_fx1 )
|
||||
|
||||
// Zgemm sup CV kernels
|
||||
GEMMSUP_KER_PROT( dcomplex, z, gemmsup_cv_zen4_asm_12x4m )
|
||||
GEMMSUP_KER_PROT( dcomplex, z, gemmsup_cv_zen4_asm_12x3m )
|
||||
|
||||
@@ -93,6 +93,48 @@ bool bli_cntx_gemmsup_thresh_is_met_zen5( obj_t* a, obj_t* b, obj_t* c, cntx_t*
|
||||
if( ( m <= 216 ) && ( n <= 216 ) && ( k <= 216 ) ) return TRUE;
|
||||
return FALSE;
|
||||
}
|
||||
else if( dt == BLIS_SCOMPLEX )
|
||||
{
|
||||
dim_t k = bli_obj_width_after_trans( a );
|
||||
dim_t m, n;
|
||||
|
||||
const stor3_t stor_id = bli_obj_stor3_from_strides( c, a, b );
|
||||
|
||||
if ( bli_cntx_l3_sup_ker_dislikes_storage_of( c, stor_id, cntx ) )
|
||||
{
|
||||
m = bli_obj_width(c);
|
||||
n = bli_obj_length(c);
|
||||
}
|
||||
else
|
||||
{
|
||||
m = bli_obj_length( c );
|
||||
n = bli_obj_width( c );
|
||||
}
|
||||
|
||||
// The threshold conditionals are as follows:
|
||||
if( n <= 540 )
|
||||
{
|
||||
if( n <= 420 ) return TRUE;
|
||||
else if( m <= 1260 ) return TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( m <= 420 )
|
||||
{
|
||||
if( m <= 180 ) return TRUE;
|
||||
else if( n <= 2100 ) return TRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
if( k <= 540 )
|
||||
{
|
||||
if( n <= 1260 ) return TRUE;
|
||||
else if( m <= 900 ) return TRUE;
|
||||
}
|
||||
}
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
else
|
||||
return bli_cntx_l3_sup_thresh_is_met( a, b, c, cntx );
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user