mirror of
https://github.com/amd/blis.git
synced 2026-05-11 09:39:59 +00:00
AVX-512 based col-preferred kernels for ZGEMM in native path
- Kernel block size is 12x4 - Updated the zen4 config to enable these kernels in zen4 path. - Tuned MC,KC,NC for better performance for m/n/k size > 500 - Updated CMakeLists.txt with ZGEMM kernels for windows build. Kernel supports: 1. Preload and prebroadcast of A and B 2. Prefecth of C Matrix 3. K loop is sub divided in to multiple loops to maintain distance between c prefetchs. 4. Special case when alpha/beta imag component is zero 5. Row/Col/General stride of Matrix C AMD-Internal: [CPUPL-2998] Change-Id: I62e3c352d475b1add3f43270805fbcee00e2e440
This commit is contained in:
@@ -345,6 +345,7 @@ if(${TARGET_ARCH} STREQUAL zen4 OR
|
||||
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/kernels/zen4/3/bli_gemmtrsm_u_zen4_8x24.c PROPERTIES COMPILE_FLAGS /arch:AVX512)
|
||||
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/kernels/skx/3/bli_dgemm_skx_asm_16x14.c PROPERTIES COMPILE_FLAGS /arch:AVX512)
|
||||
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/kernels/skx/3/bli_sgemm_skx_asm_32x12_l2.c PROPERTIES COMPILE_FLAGS /arch:AVX512)
|
||||
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/kernels/zen4/3/bli_zgemm_zen4_asm_12x4.c PROPERTIES COMPILE_FLAGS /arch:AVX512)
|
||||
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/kernels/zen4/3/bli_trsm_small_AVX512.c PROPERTIES COMPILE_FLAGS /arch:AVX512)
|
||||
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/kernels/zen4/3/sup/bli_gemmsup_rd_zen_s6x64.h PROPERTIES COMPILE_FLAGS /arch:AVX512)
|
||||
set_source_files_properties(${CMAKE_CURRENT_SOURCE_DIR}/kernels/zen4/3/sup/bli_gemmsup_rd_zen_s6x64.c PROPERTIES COMPILE_FLAGS /arch:AVX512)
|
||||
|
||||
@@ -41,12 +41,12 @@
|
||||
|
||||
#define BLI_CNTX_DEFAULT_BLKSZ_LIST(blkszs) \
|
||||
/* s d c z */ \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MR ], 32, 32, 3, 3 ); \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MR ], 32, 32, 3, 12 ); \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NR ], 12, 6, 8, 4 ); \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 512, 128, 144, 18 ); \
|
||||
bli_blksz_init ( &blkszs[ BLIS_KC ], 480, 512, 256, 566, \
|
||||
480, 320, 256, 566 ); \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 6144, 4002, 4080, 256 ); \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 512, 128, 144, 60 ); \
|
||||
bli_blksz_init ( &blkszs[ BLIS_KC ], 480, 512, 256, 512, \
|
||||
480, 320, 256, 160 ); \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 6144, 4002, 4080, 2004 ); \
|
||||
\
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, -1, -1 ); \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 ); \
|
||||
@@ -70,9 +70,10 @@ void bli_cntx_init_zen4( cntx_t* cntx )
|
||||
BLIS_GEMM_UKR, BLIS_FLOAT , bli_sgemm_skx_asm_32x12_l2, FALSE,
|
||||
BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_zen4_asm_32x6, FALSE,
|
||||
BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_haswell_asm_3x8, TRUE,
|
||||
BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_haswell_asm_3x4, TRUE,
|
||||
/*bli_zgemm_zen4_asm_12x4 is a column preferred kernel*/
|
||||
BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_zen4_asm_12x4, FALSE,
|
||||
|
||||
// Different GEMM kernels are used for TRSM for zen4 architecture
|
||||
// Different GEMM kernels are used for TRSM for zen4 architecture
|
||||
BLIS_GEMM_FOR_TRSM_UKR, BLIS_FLOAT, bli_sgemm_haswell_asm_6x16, TRUE,
|
||||
BLIS_GEMM_FOR_TRSM_UKR, BLIS_DOUBLE, bli_dgemm_zen4_asm_8x24, TRUE,
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ target_sources("${PROJECT_NAME}"
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/bli_dgemm_zen4_asm_32x6.c
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/bli_dgemm_zen4_asm_8x24.c
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/bli_trsm_small_AVX512.c
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/bli_zgemm_zen4_asm_12x4.c
|
||||
)
|
||||
|
||||
add_subdirectory(sup)
|
||||
|
||||
1082
kernels/zen4/3/bli_zgemm_zen4_asm_12x4.c
Normal file
1082
kernels/zen4/3/bli_zgemm_zen4_asm_12x4.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -55,6 +55,7 @@ PACKM_KER_PROT( dcomplex, z, packm_zen4_asm_4xk )
|
||||
// native dgemm kernel
|
||||
GEMM_UKR_PROT( double, d, gemm_zen4_asm_32x6 )
|
||||
GEMM_UKR_PROT( double, d, gemm_zen4_asm_8x24 )
|
||||
GEMM_UKR_PROT( dcomplex, z, gemm_zen4_asm_12x4 )
|
||||
|
||||
//sgemm rv sup
|
||||
GEMMSUP_KER_PROT( float, s, gemmsup_rv_zen_asm_6x64m_avx512 )
|
||||
|
||||
Reference in New Issue
Block a user