mirror of
https://github.com/amd/blis.git
synced 2026-05-20 00:18:56 +00:00
Add low-precision POWER10 gemm kernels (#467)
Details: - This commit adds a new BLIS sandbox that (1) provides implementations based on low-precision gemm kernels, and (2) extends the BLIS typed API for those new implementations. Currently, these new kernels can only be used for the POWER10 microarchitecture; however, they may provide a template for developing similar kernels for other microarchitectures (even those beyond POWER), as changes would likely be limited to select places in the microkernel and possibly the packing routines. The new low-precision operations that are now supported include: shgemm, sbgemm, i16gemm, i8gemm, i4gemm. For more information, refer to the POWER10.md document that is included in 'sandbox/power10'.
This commit is contained in:
@@ -36,16 +36,18 @@
|
||||
// Define template prototypes for level-3 micro-kernels.
|
||||
//
|
||||
|
||||
#define GEMM_UKR_PROT( ctype, ch, opname ) \
|
||||
#define GEMM_UKR_PROT( ctype, ch, opname ) GEMM_UKR_PROT2(ctype, ctype, ch, opname)
|
||||
|
||||
#define GEMM_UKR_PROT2( ctype_in, ctype_out, ch, opname ) \
|
||||
\
|
||||
void PASTEMAC(ch,opname) \
|
||||
( \
|
||||
dim_t k, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict a, \
|
||||
ctype* restrict b, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
ctype_out* restrict alpha, \
|
||||
ctype_in* restrict a, \
|
||||
ctype_in* restrict b, \
|
||||
ctype_out* restrict beta, \
|
||||
ctype_out* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* restrict data, \
|
||||
cntx_t* restrict cntx \
|
||||
);
|
||||
|
||||
Reference in New Issue
Block a user