Adding support for AOCL_ENABLE_INSTRUCTIONS for f32 LPGEMM API.

-Currently lpgemm sets the context (block sizes and micro-kernels) based on the ISA of the machine it is being executed on. However this approach does not give the flexibility to select a different context at runtime. In order to enable runtime selection of context, the context initialization is modified to read the AOCL_ENABLE_INSTRUCTIONS env variable and set the context based on the same. As part of this commit, only f32 context selection is enabled. -Bug fixes in scale ops in f32 micro-kernels and GEMV path selection. -Added vectorized f32 packing kernels for NR=16(AVX2) and NR=64(AVX512). This is only for B matrix and helps remove dependency of f32 lpgemm api on the BLIS packing framework. AMD Internal: [CPUPL-5959] Change-Id: I4b459aaf33c54423952f89905ba43cf119ce20f6
2026-05-11 01:30:00 +00:00 · 2024-10-28 06:38:57 +00:00
parent 9ce2696fc9
commit 097cda9f9e
18 changed files with 1374 additions and 439 deletions
--- a/addon/aocl_gemm/kernels/f32f32f32/lpgemm_pack_f32.h
+++ b/addon/aocl_gemm/kernels/f32f32f32/lpgemm_pack_f32.h
@@ -31,8 +31,8 @@
   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 */
-#ifndef BLIS_GEMM_F32_PACKA
-#define BLIS_GEMM_F32_PACKA
+#ifndef BLIS_GEMM_F32_PACKAB
+#define BLIS_GEMM_F32_PACKAB

 void packa_mr16_f32f32f32of32_col_major
    (
@@ -45,6 +45,43 @@ void packa_mr16_f32f32f32of32_col_major
      dim_t*          rs_p,
      dim_t*          cs_p
    );
-#endif
+
+typedef void (*lpgemm_pack_f32)
+     (
+       float*,
+       const float*,
+       const dim_t,
+       const dim_t,
+       const dim_t,
+       const dim_t,
+       dim_t*,
+       dim_t*
+     );
+
+void packb_nr64_f32f32f32of32
+     (
+       float*       pack_b_buffer,
+       const float* b,
+       const dim_t  rs_b,
+       const dim_t  cs_b,
+       const dim_t  NC,
+       const dim_t  KC,
+       dim_t*       rs_p,
+       dim_t*       cs_p
+     );
+
+void packb_nr16_f32f32f32of32
+     (
+       float*       pack_b_buffer,
+       const float* b,
+       const dim_t  rs_b,
+       const dim_t  cs_b,
+       const dim_t  NC,
+       const dim_t  KC,
+       dim_t*       rs_p,
+       dim_t*       cs_p
+     );
+
+#endif //BLIS_GEMM_F32_PACKAB