mirror of
https://github.com/amd/blis.git
synced 2026-05-05 06:51:11 +00:00
New A packing kernels for F32 API in LPGEMM.
-New packing kernels for A matrix, both based on AVX512 and AVX2 ISA, for both row and column major storage are added as part of this change. Dependency on haswell A packing kernels are removed by this. -Tiny GEMM thresholds are further tuned for BF16 and F32 APIs. AMD-Internal: [SWLCSG-3380, SWLCSG-3415] Change-Id: I7330defacbacc9d07037ce1baf4a441f941e59be
This commit is contained in:
@@ -39,14 +39,20 @@
|
||||
// for different schemas used to pack A fringe cases.
|
||||
BLIS_INLINE void get_packa_strides_mfringe_u8s8s32os32
|
||||
(
|
||||
dim_t* rs,
|
||||
dim_t* cs,
|
||||
const dim_t rs,
|
||||
const dim_t cs,
|
||||
dim_t* rs_use,
|
||||
dim_t* cs_use,
|
||||
dim_t MR,
|
||||
dim_t m_fringe
|
||||
)
|
||||
{
|
||||
( *rs ) = 4;
|
||||
( *cs ) = ( ( *cs ) / MR ) * m_fringe;
|
||||
// Only applicable for row major packing.
|
||||
if ( ( rs != 1 ) && ( cs == 1 ) && ( ( *cs_use ) > MR ))
|
||||
{
|
||||
( *rs_use ) = 4;
|
||||
( *cs_use ) = ( ( *cs_use ) / MR ) * m_fringe;
|
||||
}
|
||||
}
|
||||
|
||||
typedef void (*packa_s32)
|
||||
|
||||
Reference in New Issue
Block a user