Implemented Multithreading Support and Optimization of DGEMV API (#10)

- Implemented multithreading framework for the DGEMV API on Zen architectures. Architecture specific AOCL-dynamic logic determines the optimal number of threads for improved performance.

- The condition check for the value of beta is optimized by utilizing masked operations. The mask value is set based on value of beta, and the masked operations are applied when the vector y is loaded or scaled with beta.

AMD-Internal: [CPUPL-6746]
This commit is contained in:
S, Hari Govind
2025-06-17 12:39:48 +05:30
committed by GitHub
parent 26e5c63781
commit e097346658
10 changed files with 1370 additions and 1722 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -160,14 +160,14 @@ GEMV_KER_PROT( scomplex, c, gemv_zen_int_4x4 )
GEMV_KER_PROT( dcomplex, z, gemv_zen_int_4x4 )
// gemv (intrinsics)
GEMV_KER_PROT( double, d, gemv_t_zen_int_avx2 )
GEMV_KER_PROT( double, d, gemv_t_zen_int_16x7m_avx2 )
GEMV_KER_PROT( double, d, gemv_t_zen_int_16x6m_avx2 )
GEMV_KER_PROT( double, d, gemv_t_zen_int_16x5m_avx2 )
GEMV_KER_PROT( double, d, gemv_t_zen_int_16x4m_avx2 )
GEMV_KER_PROT( double, d, gemv_t_zen_int_16x3m_avx2 )
GEMV_KER_PROT( double, d, gemv_t_zen_int_16x2m_avx2 )
GEMV_KER_PROT( double, d, gemv_t_zen_int_16x1m_avx2 )
GEMV_KER_PROT( double, d, gemv_t_zen_int )
GEMV_KER_PROT( double, d, gemv_t_zen_int_16x7m )
GEMV_KER_PROT( double, d, gemv_t_zen_int_16x6m )
GEMV_KER_PROT( double, d, gemv_t_zen_int_16x5m )
GEMV_KER_PROT( double, d, gemv_t_zen_int_16x4m )
GEMV_KER_PROT( double, d, gemv_t_zen_int_16x3m )
GEMV_KER_PROT( double, d, gemv_t_zen_int_16x2m )
GEMV_KER_PROT( double, d, gemv_t_zen_int_16x1m )
// her (intrinsics)
HER_KER_PROT( dcomplex, z, her_zen_int_var1 )

File diff suppressed because it is too large Load Diff

View File

@@ -146,14 +146,14 @@ GEMV_KER_PROT( double, d, gemv_n_zen_int_8x1n_avx512 )
GEMV_KER_PROT( double, d, gemv_n_zen_int_m_leftx1n_avx512 )
// dgemv_t kernels for handling op(A) = 't', i.e., transa = 't' cases.
GEMV_KER_PROT( double, d, gemv_t_zen_int_avx512 )
GEMV_KER_PROT( double, d, gemv_t_zen_int_32x7m_avx512 )
GEMV_KER_PROT( double, d, gemv_t_zen_int_32x6m_avx512 )
GEMV_KER_PROT( double, d, gemv_t_zen_int_32x5m_avx512 )
GEMV_KER_PROT( double, d, gemv_t_zen_int_32x4m_avx512 )
GEMV_KER_PROT( double, d, gemv_t_zen_int_32x3m_avx512 )
GEMV_KER_PROT( double, d, gemv_t_zen_int_32x2m_avx512 )
GEMV_KER_PROT( double, d, gemv_t_zen_int_32x1m_avx512 )
GEMV_KER_PROT( double, d, gemv_t_zen4_int )
GEMV_KER_PROT( double, d, gemv_t_zen4_int_32x7m )
GEMV_KER_PROT( double, d, gemv_t_zen4_int_32x6m )
GEMV_KER_PROT( double, d, gemv_t_zen4_int_32x5m )
GEMV_KER_PROT( double, d, gemv_t_zen4_int_32x4m )
GEMV_KER_PROT( double, d, gemv_t_zen4_int_32x3m )
GEMV_KER_PROT( double, d, gemv_t_zen4_int_32x2m )
GEMV_KER_PROT( double, d, gemv_t_zen4_int_32x1m )
GEMMTRSM_UKR_PROT( double, d, gemmtrsm_l_zen_asm_16x14)
GEMMTRSM_UKR_PROT( double, d, gemmtrsm_u_zen_asm_16x14)