mirror of
https://github.com/amd/blis.git
synced 2026-05-04 06:21:12 +00:00
LPGEMM: Added transA support for bf16bf16f32o<bf16|f32> APIs
Details: - Added new params(order, trans) to aocl_get_reorder_buf_size_ and aocl_reorder_ APIs. - Added new pack kernels that packs A matrix from either row-major or column major input matrix to pack buffer with row-major format. - Updated cntx with pack kernel function pointers for packing A matrix. - Transpose of A matrix is handled by packing A matrix to row-major format during run-time. - Updated Early-return check conditions to account for trans parameters. - Updated bench file to test/benchmark transpose support. AMD-Internal: [SWLCSG-2268, SWLCSG-2442] Change-Id: I43a113dc4bc11e6bb7cc4d768c239a16cb6bbea4
This commit is contained in:
@@ -84,7 +84,7 @@
|
||||
#define LPGEMM_PACKA_FUNC_MAP_AVX512_VNNI \
|
||||
PAMACRO(U8S8S16OS16, NULL) \
|
||||
PAMACRO(U8S8S32OS32, packa_k64_u8s8s32o32) \
|
||||
PAMACRO(BF16BF16F32OF32, NULL) \
|
||||
PAMACRO(BF16BF16F32OF32, packa_mr16_bf16bf16f32of32) \
|
||||
PAMACRO(S8S8S32OS32, packa_k64_s8s8s32os32) \
|
||||
PAMACRO(S8S8S16OS16, NULL) \
|
||||
|
||||
@@ -112,7 +112,7 @@
|
||||
#define LPGEMM_PACKA_FUNC_MAP_AVX512 \
|
||||
PAMACRO(U8S8S16OS16, NULL) \
|
||||
PAMACRO(U8S8S32OS32, packa_k64_u8s8s32o32) \
|
||||
PAMACRO(BF16BF16F32OF32, NULL) \
|
||||
PAMACRO(BF16BF16F32OF32, packa_mr16_bf16bf16f32of32) \
|
||||
PAMACRO(S8S8S32OS32, packa_k64_s8s8s32os32) \
|
||||
PAMACRO(S8S8S16OS16, NULL) \
|
||||
|
||||
@@ -140,7 +140,7 @@
|
||||
#define LPGEMM_PACKA_FUNC_MAP_AVX2 \
|
||||
PAMACRO(U8S8S16OS16, NULL) \
|
||||
PAMACRO(U8S8S32OS32, NULL) \
|
||||
PAMACRO(BF16BF16F32OF32, NULL) \
|
||||
PAMACRO(BF16BF16F32OF32, packa_mr16_bf16bf16f32of32) \
|
||||
PAMACRO(S8S8S32OS32, NULL) \
|
||||
PAMACRO(S8S8S16OS16, NULL) \
|
||||
|
||||
|
||||
Reference in New Issue
Block a user