mirror of
https://github.com/amd/blis.git
synced 2026-05-04 22:41:11 +00:00
aocl_softmax_f32 api's for softmax computation as part of lpgemm.
-Softmax is often used as the last activation function in a neural network - softmax(xi) = exp(xi)/(exp(x0) + exp(x1) + ... + exp(xn))). This step happens after the final low precision gemm computation, and it helps to have the softmax functionality that can be invoked as part of the lpgemm workflow. In order to support this, a new api, aocl_softmax_f32 is introduced as part of aocl_gemm. This api computes element-wise softmax of a matrix/vector of floats. This api invokes ISA specific vectorized micro-kernels (vectorized only when incx=1), and a cntx based mechanism (similar to lpgemm_cntx) is used to dispatch to the appropriate kernel. AMD-Internal: [CPUPL-3247] Change-Id: If15880360947435985fa87b6436e475571e4684a
This commit is contained in:
committed by
MithunMohan KadavilMadanaMohanan
parent
4aace5f524
commit
3572baa9d3
@@ -67,6 +67,7 @@
|
||||
#define LPGEMM_UTIL_KERN_FUNC_MAP_AVX512_VNNI_BF16 \
|
||||
UMACRO(F32_GELU_TANH, lpgemm_util_f32_gelu_tanh_avx512_kernel) \
|
||||
UMACRO(F32_GELU_ERF, lpgemm_util_f32_gelu_erf_avx512_kernel) \
|
||||
UMACRO(F32_SOFTMAX, lpgemm_util_f32_softmax_avx512_kernel) \
|
||||
|
||||
// Icelake
|
||||
#define LPGEMM_KERN_FUNC_MAP_AVX512_VNNI \
|
||||
@@ -91,6 +92,7 @@
|
||||
#define LPGEMM_UTIL_KERN_FUNC_MAP_AVX512_VNNI \
|
||||
UMACRO(F32_GELU_TANH, lpgemm_util_f32_gelu_tanh_avx512_kernel) \
|
||||
UMACRO(F32_GELU_ERF, lpgemm_util_f32_gelu_erf_avx512_kernel) \
|
||||
UMACRO(F32_SOFTMAX, lpgemm_util_f32_softmax_avx512_kernel) \
|
||||
|
||||
// Skylake
|
||||
#define LPGEMM_KERN_FUNC_MAP_AVX512 \
|
||||
@@ -115,6 +117,7 @@
|
||||
#define LPGEMM_UTIL_KERN_FUNC_MAP_AVX512 \
|
||||
UMACRO(F32_GELU_TANH, lpgemm_util_f32_gelu_tanh_avx512_kernel) \
|
||||
UMACRO(F32_GELU_ERF, lpgemm_util_f32_gelu_erf_avx512_kernel) \
|
||||
UMACRO(F32_SOFTMAX, lpgemm_util_f32_softmax_avx512_kernel) \
|
||||
|
||||
// Milan, Haswell
|
||||
#define LPGEMM_KERN_FUNC_MAP_AVX2 \
|
||||
@@ -139,5 +142,6 @@
|
||||
#define LPGEMM_UTIL_KERN_FUNC_MAP_AVX2 \
|
||||
UMACRO(F32_GELU_TANH, lpgemm_util_f32_gelu_tanh_avx2_kernel) \
|
||||
UMACRO(F32_GELU_ERF, lpgemm_util_f32_gelu_erf_avx2_kernel) \
|
||||
UMACRO(F32_SOFTMAX, lpgemm_util_f32_softmax_avx2_kernel) \
|
||||
|
||||
#endif //LPGEMM_FUNC_MAP_H
|
||||
|
||||
Reference in New Issue
Block a user