mirror of
https://github.com/amd/blis.git
synced 2026-05-03 22:11:12 +00:00
Added low precision GEMM - bf16bf16f32of32
Feature Addition: Added a new variant of low precision GEMM to addon - BFloat16. The kernel takes bf16 type inputs and perform BF16 GEMM operations. The intermediate accumulation and output are in float. 1. Compute kernels will perform computations only if B matrix is reordered in accordance with the usage of AVX-512 BF16 instruction - dpbf16_ps 2. Kernel for packing B matrix is provided Change-Id: If5d08213068869eff060c9998596d2d2703a6793
This commit is contained in:
@@ -37,6 +37,7 @@
|
||||
|
||||
#include "lpgemm_types.h"
|
||||
#include "lpgemm_post_ops.h"
|
||||
#include "aocl_bf16_type.h"
|
||||
|
||||
#define LPGEMM_5LOOP(A_type,B_type,C_type,LP_SFX) \
|
||||
void lpgemm_rowvar_ ## LP_SFX \
|
||||
@@ -64,4 +65,5 @@ void lpgemm_rowvar_ ## LP_SFX \
|
||||
LPGEMM_5LOOP(uint8_t,int8_t,int32_t,u8s8s32o32);
|
||||
LPGEMM_5LOOP(uint8_t,int8_t,int16_t,u8s8s16o16);
|
||||
LPGEMM_5LOOP(float,float,float,f32f32f32of32);
|
||||
LPGEMM_5LOOP(bfloat16,bfloat16,float,bf16bf16f32of32);
|
||||
#endif // LPGEMM_5LOOP_INTF_H
|
||||
|
||||
Reference in New Issue
Block a user