mirror of
https://github.com/amd/blis.git
synced 2026-04-20 15:48:50 +00:00
Added AVX512 and AVX2 FP32 RD Kernels
- Added FP32 RD (dot-product) kernels for both, AVX512 and AVX2 ISAs.
- The FP32 AVX512 primary RD kernel has blocking of dimensions 6x64
(MRxNR) whereas it is 6x16 (MRxNR) for the AVX2 primary RD kernel.
- Updatd f32 framework to accomodate rd kernels in case of B trans
with thresholds
- Updated data gen python script
TODO:
- Post-Ops not yet supported.
Change-Id: Ibf282741f58a1446321273d5b8044db993f23714
This commit is contained in:
committed by
Nallani Bhaskar
parent
e0b86c69af
commit
c68c258fad
@@ -44,6 +44,21 @@
|
||||
|
||||
LPGEMM_MAIN_KERN(float,float,float,f32f32f32of32_6x16m)
|
||||
{
|
||||
//Call RD kernels if B is transposed
|
||||
if(rs_b == 1)
|
||||
{
|
||||
lpgemm_rowvar_f32f32f32of32_6x16m_rd
|
||||
(
|
||||
m0, n0, k0,
|
||||
a, rs_a, cs_a, ps_a,
|
||||
b, rs_b, cs_b,
|
||||
c, rs_c, cs_c,
|
||||
alpha, beta,
|
||||
post_ops_list, post_ops_attr
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
static void* post_ops_labels[] =
|
||||
{
|
||||
&&POST_OPS_6x16F_DISABLE,
|
||||
@@ -60,6 +75,13 @@ LPGEMM_MAIN_KERN(float,float,float,f32f32f32of32_6x16m)
|
||||
&&POST_OPS_TANH_6x16F,
|
||||
&&POST_OPS_SIGMOID_6x16F
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
uint64_t n_left = n0 % NR; //n0 is expected to be n0<=NR
|
||||
// First check whether this is a edge case in the n dimension.
|
||||
// If so, dispatch other 6x?m kernels, as needed.
|
||||
|
||||
1543
kernels/zen/lpgemm/f32f32f32/lpgemm_m_rd_fringe_f32_avx2.c
Normal file
1543
kernels/zen/lpgemm/f32f32f32/lpgemm_m_rd_fringe_f32_avx2.c
Normal file
File diff suppressed because it is too large
Load Diff
1288
kernels/zen/lpgemm/f32f32f32/lpgemm_m_rd_kernel_f32_avx2.c
Normal file
1288
kernels/zen/lpgemm/f32f32f32/lpgemm_m_rd_kernel_f32_avx2.c
Normal file
File diff suppressed because it is too large
Load Diff
@@ -44,6 +44,21 @@
|
||||
|
||||
LPGEMM_MAIN_KERN(float,float,float,f32f32f32of32_avx512_6x64m)
|
||||
{
|
||||
//Call RD kernels if B is transposed
|
||||
if(rs_b == 1)
|
||||
{
|
||||
lpgemm_rowvar_f32f32f32of32_avx512_6x64m_rd
|
||||
(
|
||||
m0, n0, k0,
|
||||
a, rs_a, cs_a, ps_a,
|
||||
b, rs_b, cs_b,
|
||||
c, rs_c, cs_c,
|
||||
alpha, beta,
|
||||
post_ops_list, post_ops_attr
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
static void* post_ops_labels[] =
|
||||
{
|
||||
&&POST_OPS_6x64F_DISABLE,
|
||||
@@ -60,7 +75,9 @@ LPGEMM_MAIN_KERN(float,float,float,f32f32f32of32_avx512_6x64m)
|
||||
&&POST_OPS_TANH_6x64F,
|
||||
&&POST_OPS_SIGMOID_6x64F
|
||||
};
|
||||
|
||||
uint64_t n_left = n0 % 64; //n0 is expected to be n0<=NR
|
||||
|
||||
// First check whether this is a edge case in the n dimension.
|
||||
// If so, dispatch other 12x?m kernels, as needed.
|
||||
if ( n_left )
|
||||
|
||||
4988
kernels/zen4/lpgemm/f32f32f32/lpgemm_m_rd_fringe_f32_avx512.c
Normal file
4988
kernels/zen4/lpgemm/f32f32f32/lpgemm_m_rd_fringe_f32_avx512.c
Normal file
File diff suppressed because it is too large
Load Diff
1931
kernels/zen4/lpgemm/f32f32f32/lpgemm_m_rd_kernel_f32_avx512.c
Normal file
1931
kernels/zen4/lpgemm/f32f32f32/lpgemm_m_rd_kernel_f32_avx512.c
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user