Added AVX512 and AVX2 FP32 RD Kernels

- Added FP32 RD (dot-product) kernels for both, AVX512 and AVX2 ISAs.
- The FP32 AVX512 primary RD kernel has blocking of dimensions 6x64
  (MRxNR) whereas it is 6x16 (MRxNR) for the AVX2 primary RD kernel.
- Updatd f32 framework to accomodate rd kernels in case of B trans
  with thresholds
- Updated data gen python script
TODO:
    - Post-Ops not yet supported.

Change-Id: Ibf282741f58a1446321273d5b8044db993f23714
This commit is contained in:
Arnav Sharma
2025-03-28 12:18:53 +05:30
committed by Nallani Bhaskar
parent e0b86c69af
commit c68c258fad
9 changed files with 9863 additions and 10 deletions

View File

@@ -44,6 +44,21 @@
LPGEMM_MAIN_KERN(float,float,float,f32f32f32of32_6x16m)
{
//Call RD kernels if B is transposed
if(rs_b == 1)
{
lpgemm_rowvar_f32f32f32of32_6x16m_rd
(
m0, n0, k0,
a, rs_a, cs_a, ps_a,
b, rs_b, cs_b,
c, rs_c, cs_c,
alpha, beta,
post_ops_list, post_ops_attr
);
return;
}
static void* post_ops_labels[] =
{
&&POST_OPS_6x16F_DISABLE,
@@ -60,6 +75,13 @@ LPGEMM_MAIN_KERN(float,float,float,f32f32f32of32_6x16m)
&&POST_OPS_TANH_6x16F,
&&POST_OPS_SIGMOID_6x16F
};
uint64_t n_left = n0 % NR; //n0 is expected to be n0<=NR
// First check whether this is a edge case in the n dimension.
// If so, dispatch other 6x?m kernels, as needed.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -44,6 +44,21 @@
LPGEMM_MAIN_KERN(float,float,float,f32f32f32of32_avx512_6x64m)
{
//Call RD kernels if B is transposed
if(rs_b == 1)
{
lpgemm_rowvar_f32f32f32of32_avx512_6x64m_rd
(
m0, n0, k0,
a, rs_a, cs_a, ps_a,
b, rs_b, cs_b,
c, rs_c, cs_c,
alpha, beta,
post_ops_list, post_ops_attr
);
return;
}
static void* post_ops_labels[] =
{
&&POST_OPS_6x64F_DISABLE,
@@ -60,7 +75,9 @@ LPGEMM_MAIN_KERN(float,float,float,f32f32f32of32_avx512_6x64m)
&&POST_OPS_TANH_6x64F,
&&POST_OPS_SIGMOID_6x64F
};
uint64_t n_left = n0 % 64; //n0 is expected to be n0<=NR
// First check whether this is a edge case in the n dimension.
// If so, dispatch other 12x?m kernels, as needed.
if ( n_left )

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff