mirror of
https://github.com/amd/blis.git
synced 2026-05-05 06:51:11 +00:00
aocl_gelu_<tanh|erf>_f32 api's for gelu computation as part of lpgemm.
-Currently in aocl_gemm, gelu (both tanh and erf based) computation is only supported as a post-op as part of low precision gemm api call (done at micro-kernel level). However gelu computation alone without gemm is required in certain cases for users of aocl_gemm. -In order to support this, two new api's - aocl_gelu_tanh_f32 and aocl_gelu_erf_f32 are introduced as part of aocl_gemm. These api's computes element-wise gelu_tanh and gelu_erf respectively of a matrix/ vector of floats. Both the api's invokes ISA specific vectorized micro- kernels (vectorized only when incx=1), and a cntx based mechanism (similar to lpgemm_cntx) is used to dispatch to the appropriate kernel. AMD-Internal: [CPUPL-3218] Change-Id: Ifebbaf5566d7462288a9a67f479104268b0cc704
This commit is contained in:
committed by
MithunMohan KadavilMadanaMohanan
parent
12c97021a1
commit
e23765010d
@@ -64,6 +64,10 @@
|
||||
PBMACRO(BF16BF16F32OF32, packb_nr64_bf16bf16f32of32) \
|
||||
PBMACRO(S8S8S32OS32, packb_nr64_s8s8s32os32) \
|
||||
|
||||
#define LPGEMM_UTIL_KERN_FUNC_MAP_AVX512_VNNI_BF16 \
|
||||
UMACRO(F32_GELU_TANH, lpgemm_util_f32_gelu_tanh_avx512_kernel) \
|
||||
UMACRO(F32_GELU_ERF, lpgemm_util_f32_gelu_erf_avx512_kernel) \
|
||||
|
||||
// Icelake
|
||||
#define LPGEMM_KERN_FUNC_MAP_AVX512_VNNI \
|
||||
KMACRO(U8S8S16OS16, lpgemm_rowvar_u8s8s16o16_6x32) \
|
||||
@@ -84,6 +88,10 @@
|
||||
PBMACRO(BF16BF16F32OF32, packb_nr64_bf16bf16f32of32) \
|
||||
PBMACRO(S8S8S32OS32, packb_nr64_s8s8s32os32) \
|
||||
|
||||
#define LPGEMM_UTIL_KERN_FUNC_MAP_AVX512_VNNI \
|
||||
UMACRO(F32_GELU_TANH, lpgemm_util_f32_gelu_tanh_avx512_kernel) \
|
||||
UMACRO(F32_GELU_ERF, lpgemm_util_f32_gelu_erf_avx512_kernel) \
|
||||
|
||||
// Skylake
|
||||
#define LPGEMM_KERN_FUNC_MAP_AVX512 \
|
||||
KMACRO(U8S8S16OS16, lpgemm_rowvar_u8s8s16o16_6x32) \
|
||||
@@ -104,6 +112,10 @@
|
||||
PBMACRO(BF16BF16F32OF32, packb_nr64_bf16bf16f32of32) \
|
||||
PBMACRO(S8S8S32OS32, packb_nr64_s8s8s32os32) \
|
||||
|
||||
#define LPGEMM_UTIL_KERN_FUNC_MAP_AVX512 \
|
||||
UMACRO(F32_GELU_TANH, lpgemm_util_f32_gelu_tanh_avx512_kernel) \
|
||||
UMACRO(F32_GELU_ERF, lpgemm_util_f32_gelu_erf_avx512_kernel) \
|
||||
|
||||
// Milan, Haswell
|
||||
#define LPGEMM_KERN_FUNC_MAP_AVX2 \
|
||||
KMACRO(U8S8S16OS16, lpgemm_rowvar_u8s8s16o16_6x32) \
|
||||
@@ -124,4 +136,8 @@
|
||||
PBMACRO(BF16BF16F32OF32, NULL) \
|
||||
PBMACRO(S8S8S32OS32, NULL) \
|
||||
|
||||
#define LPGEMM_UTIL_KERN_FUNC_MAP_AVX2 \
|
||||
UMACRO(F32_GELU_TANH, lpgemm_util_f32_gelu_tanh_avx2_kernel) \
|
||||
UMACRO(F32_GELU_ERF, lpgemm_util_f32_gelu_erf_avx2_kernel) \
|
||||
|
||||
#endif //LPGEMM_FUNC_MAP_H
|
||||
|
||||
Reference in New Issue
Block a user