mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
AVX512 based SGEMM Optimizations
- Updated with optimal cache-blocking sizes for MC, KC and NC for AVX512 Native SGEMM kernel. AMD-Internal: [CPUPL-2385] Change-Id: I1feae5ac79e960c6b26df24756d460243820b797
This commit is contained in:
committed by
Arnav Sharma
parent
c85bbfdb50
commit
a226e54421
@@ -43,10 +43,10 @@
|
||||
/* s d c z */ \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MR ], 32, 16, 3, 3 ); \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NR ], 12, 14, 8, 4 ); \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 480, 240, 144, 18 ); \
|
||||
bli_blksz_init ( &blkszs[ BLIS_KC ], 384, 512, 256, 566, \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 512, 240, 144, 18 ); \
|
||||
bli_blksz_init ( &blkszs[ BLIS_KC ], 480, 512, 256, 566, \
|
||||
480, 320, 256, 566 ); \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 3072, 4004, 4080, 256 ); \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 6144, 4004, 4080, 256 ); \
|
||||
\
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, -1, -1 ); \
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, -1, -1 ); \
|
||||
|
||||
Reference in New Issue
Block a user