Bugfix for A matrix packing in int8(S8/U8) APIs

- A matrix packing by default in isn't necessary for row-major matrix data. Also, it seems packing of A was
 giving regressions and hence wasn't expected to be used.
 - However, packA is necessary in column-major cases, where transpose has to be done. This path has been verified.
 - Hence, when user sets pack A explicitly, it gets into the incomplete packA function, and overwrites the elements
 in the buffer after subsequent iterations, leading to accuracy issues. As a fix to this the patch updates PACK
 condition to UNPACKED at the interface while user explicitly sets one, ensuring seamless execution.

[ AMD-Internal : CPUPL - 7193 ]
This commit is contained in:
V, Varsha
2025-08-22 18:46:19 +05:30
committed by GitHub
parent 5044b69d3d
commit 6cdab2720c
10 changed files with 68 additions and 10 deletions

View File

@@ -158,7 +158,14 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,bfloat16,int32_t,s8s8s32obf16)
" not supported.", __FILE__, __LINE__);
goto err_hndl;
}
// A matrix packing is only done in column major case, or when
// A matrix is transposed in row major. PackA kernels for row-maj
// is not supported, hence we set it to unpacked and proceed with GEMM.
if ((is_row_major == TRUE) && (mtag_a == PACK)) {
mtag_a = UNPACKED;
} else if (is_column_major == TRUE && mtag_b == PACK) {
mtag_b = UNPACKED;
}
// From 5-loop function point of view
// B matrix needs to be packed in a certain format in order to be loaded
// and used in bf16 instrution. As such the mtag_b always needs to be either

View File

@@ -151,7 +151,14 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,float,int32_t,s8s8s32of32)
" not supported.", __FILE__, __LINE__);
goto err_hndl;
}
// A matrix packing is only done in column major case, or when
// A matrix is transposed in row major. PackA kernels for row-maj
// is not supported, hence we set it to unpacked and proceed with GEMM.
if ((is_row_major == TRUE) && (mtag_a == PACK)) {
mtag_a = UNPACKED;
} else if (is_column_major == TRUE && mtag_b == PACK) {
mtag_b = UNPACKED;
}
// From 5-loop function point of view
// B matrix needs to be packed in a certain format in order to be loaded
// and used in bf16 instrution. As such the mtag_b always needs to be either

View File

@@ -151,7 +151,14 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int32_t,int32_t,s8s8s32os32)
"is not supported.", __FILE__, __LINE__);
goto err_hndl;
}
// A matrix packing is only done in column major case, or when
// A matrix is transposed in row major. PackA kernels for row-maj
// is not supported, hence we set it to unpacked and proceed with GEMM.
if ((is_row_major == TRUE) && (mtag_a == PACK)) {
mtag_a = UNPACKED;
} else if (is_column_major == TRUE && mtag_b == PACK) {
mtag_b = UNPACKED;
}
// From 5-loop function point of view
// B matrix needs to be packed in a certain format in order to be loaded
// and used in bf16 instrution. As such the mtag_b always needs to be either

View File

@@ -151,7 +151,14 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int32_t,s8s8s32os8)
" not supported.", __FILE__, __LINE__);
goto err_hndl;
}
// A matrix packing is only done in column major case, or when
// A matrix is transposed in row major. PackA kernels for row-maj
// is not supported, hence we set it to unpacked and proceed with GEMM.
if ((is_row_major == TRUE) && (mtag_a == PACK)) {
mtag_a = UNPACKED;
} else if (is_column_major == TRUE && mtag_b == PACK) {
mtag_b = UNPACKED;
}
// From 5-loop function point of view
// B matrix needs to be packed in a certain format in order to be loaded
// and used in bf16 instrution. As such the mtag_b always needs to be either

View File

@@ -139,7 +139,12 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,uint8_t,int32_t,s8s8s32ou8)
__FILE__, __LINE__);
goto err_hndl;
}
// A matrix packing is done only if the inputs are transposed in a
// row major scenario. A matrix packing in row major is not supported,
// hence it is changed to unpacked and proceed with the GEMM.
if (mtag_a == PACK) {
mtag_a = UNPACKED;
}
// From 5-loop function point of view
// B matrix needs to be packed in a certain format in order to be loaded
// and used in bf16 instrution. As such the mtag_b always needs to be either

View File

@@ -144,7 +144,12 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,bfloat16,int32_t,u8s8s32obf16)
__FILE__, __LINE__);
goto err_hndl;
}
// A matrix packing is done only if the inputs are transposed in a
// row major scenario. A matrix packing in row major is not supported,
// hence it is changed to unpacked and proceed with the GEMM.
if (mtag_a == PACK) {
mtag_a = UNPACKED;
}
// From 5-loop function point of view
// B matrix needs to be packed in a certain format in order to be loaded
// and used in bf16 instrution. As such the mtag_b always needs to be either

View File

@@ -139,7 +139,12 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,float,int32_t,u8s8s32of32)
__FILE__, __LINE__);
goto err_hndl;
}
// A matrix packing is done only if the inputs are transposed in a
// row major scenario. A matrix packing in row major is not supported,
// hence it is changed to unpacked and proceed with the GEMM.
if (mtag_a == PACK) {
mtag_a = UNPACKED;
}
// From 5-loop function point of view
// B matrix needs to be packed in a certain format in order to be loaded
// and used in bf16 instrution. As such the mtag_b always needs to be either

View File

@@ -139,7 +139,12 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int32_t,int32_t,u8s8s32os32)
__FILE__, __LINE__);
goto err_hndl;
}
// A matrix packing is done only if the inputs are transposed in a
// row major scenario. A matrix packing in row major is not supported,
// hence it is changed to unpacked and proceed with the GEMM.
if (mtag_a == PACK) {
mtag_a = UNPACKED;
}
// From 5-loop function point of view
// B matrix needs to be packed in a certain format in order to be loaded
// and used in bf16 instrution. As such the mtag_b always needs to be either

View File

@@ -139,7 +139,12 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int32_t,u8s8s32os8)
__FILE__, __LINE__);
goto err_hndl;
}
// A matrix packing is done only if the inputs are transposed in a
// row major scenario. A matrix packing in row major is not supported,
// hence it is changed to unpacked and proceed with the GEMM.
if (mtag_a == PACK) {
mtag_a = UNPACKED;
}
// From 5-loop function point of view
// B matrix needs to be packed in a certain format in order to be loaded
// and used in bf16 instrution. As such the mtag_b always needs to be either

View File

@@ -139,7 +139,12 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,uint8_t,int32_t,u8s8s32ou8)
__FILE__, __LINE__);
goto err_hndl;
}
// A matrix packing is done only if the inputs are transposed in a
// row major scenario. A matrix packing in row major is not supported,
// hence it is changed to unpacked and proceed with the GEMM.
if (mtag_a == PACK) {
mtag_a = UNPACKED;
}
// From 5-loop function point of view
// B matrix needs to be packed in a certain format in order to be loaded
// and used in bf16 instrution. As such the mtag_b always needs to be either