diff --git a/addon/aocl_gemm/aocl_gemm_bf16bf16f32obf16.c b/addon/aocl_gemm/aocl_gemm_bf16bf16f32obf16.c
index d182f529c..ae9e877f8 100644
--- a/addon/aocl_gemm/aocl_gemm_bf16bf16f32obf16.c
+++ b/addon/aocl_gemm/aocl_gemm_bf16bf16f32obf16.c
@@ -41,9 +41,23 @@
 #include "lpgemm_5loop_interface_apis.h"
 #include "lpgemm_config.h"
 #include "lpgemm_utils.h"
+#include "lpgemm_logger.h"
 
 AOCL_GEMM_MATMUL(bfloat16,bfloat16,bfloat16,float,bf16bf16f32obf16)
 {
+	LPGEMM_START_LOGGER();
+	LPGEMM_WRITE_LOGGER \
+	(
+	  "bf16bf16f32obf16", \
+	  order, transa, transb, \
+	  m, n, k, \
+	  ( ( float ) alpha ), \
+	  lda, mem_format_a, \
+	  ldb, mem_format_b, \
+	  ( ( float ) beta ), \
+	  ldc, post_op_unparsed \
+	);
+
 	trans_t blis_transa;
 	trans_t blis_transb;
 
@@ -52,7 +66,7 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,bfloat16,float,bf16bf16f32obf16)
 	{
 		bli_print_msg(" AVX512_BF16 ISA not supported by processor, "
 				"cannot perform bf16bf16f32 gemm.", __FILE__, __LINE__ );
-		return; // Error.
+		goto err_hndl;
 	}
 
 	/* Initialize BLIS. */
@@ -62,6 +76,7 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,bfloat16,float,bf16bf16f32obf16)
 	aocl_lpgemm_init_global_cntx();
 
 	// check for validity of params.
+	int err_no = 0;
 	AOCL_GEMM_CHECK
 	(
 	  "bf16bf16f32obf16",
@@ -69,8 +84,13 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,bfloat16,float,bf16bf16f32obf16)
 	  m, n, k,
 	  a, lda, mem_format_a,
 	  b, ldb, mem_format_b,
-	  c, ldc
+	  c, ldc,
+	  err_no
 	);
+	if ( err_no != 0 )
+	{
+		goto err_hndl;
+	}
 
 #ifdef LPGEMM_BF16_JIT
 	if( jit_kernels_generated == FALSE )
@@ -119,14 +139,14 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,bfloat16,float,bf16bf16f32obf16)
 	if( ( is_row_major == TRUE ) && ( mtag_a == REORDERED ) )
 	{
 		bli_print_msg(" Reordering of A matrix is not supported in row major case.", __FILE__, __LINE__ );
-		return;
+		goto err_hndl;
 	}
 	// Inputs swapped in column major, A becomes B from kernel point of view.
 	// Reorder is not supported for column major matrices.
 	else if ( ( is_column_major == TRUE ) && ( ( mtag_b == REORDERED ) || ( mtag_a == REORDERED ) ) )
 	{
 		bli_print_msg(" Reordering of column major matrices is not supported.", __FILE__, __LINE__ );
-		return;
+		goto err_hndl;
 	}
 
 	// From 5-loop function point of view,
@@ -166,7 +186,10 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,bfloat16,float,bf16bf16f32obf16)
 	  m, n
 	);
 
-	if( err != BLIS_SUCCESS ) return;
+	if( err != BLIS_SUCCESS )
+	{
+		goto err_hndl;
+	}
 
 	// Initialize a local runtime with global settings if necessary. Note
 	// that in the case that a runtime is passed in, we make a local copy.
@@ -233,4 +256,7 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,bfloat16,float,bf16bf16f32obf16)
 		);
 	}
 #endif
+
+err_hndl:;
+	LPGEMM_STOP_LOGGER();
 }
diff --git a/addon/aocl_gemm/aocl_gemm_bf16bf16f32of32.c b/addon/aocl_gemm/aocl_gemm_bf16bf16f32of32.c
index 0930fb5d1..dc591ac15 100644
--- a/addon/aocl_gemm/aocl_gemm_bf16bf16f32of32.c
+++ b/addon/aocl_gemm/aocl_gemm_bf16bf16f32of32.c
@@ -41,9 +41,23 @@
 #include "lpgemm_5loop_interface_apis.h"
 #include "lpgemm_config.h"
 #include "lpgemm_utils.h"
+#include "lpgemm_logger.h"
 
 AOCL_GEMM_MATMUL(bfloat16,bfloat16,float,float,bf16bf16f32of32)
 {
+	LPGEMM_START_LOGGER();
+	LPGEMM_WRITE_LOGGER \
+	(
+	  "bf16bf16f32of32", \
+	  order, transa, transb, \
+	  m, n, k, \
+	  ( ( float ) alpha ), \
+	  lda, mem_format_a, \
+	  ldb, mem_format_b, \
+	  ( ( float ) beta ), \
+	  ldc, post_op_unparsed \
+	);
+
 	trans_t blis_transa;
 	trans_t blis_transb;
 
@@ -52,7 +66,7 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,float,float,bf16bf16f32of32)
 	{
 		bli_print_msg(" AVX512_BF16 ISA not supported by processor, "
 				"cannot perform bf16bf16f32 gemm.", __FILE__, __LINE__ );
-		return; // Error.
+		goto err_hndl;
 	}
 
 	/* Initialize BLIS. */
@@ -61,7 +75,8 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,float,float,bf16bf16f32of32)
 	// Set MC, NC, KC, NR, MR.
 	aocl_lpgemm_init_global_cntx();
 
-// check for validity of params.
+	// check for validity of params.
+	int err_no = 0;
 	AOCL_GEMM_CHECK
 	(
 	  "bf16bf16f32of32",
@@ -69,8 +84,13 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,float,float,bf16bf16f32of32)
 	  m, n, k,
 	  a, lda, mem_format_a,
 	  b, ldb, mem_format_b,
-	  c, ldc
+	  c, ldc,
+	  err_no
 	);
+	if ( err_no != 0 )
+	{
+		goto err_hndl;
+	}
 
 #ifdef LPGEMM_BF16_JIT
 	if( jit_kernels_generated == FALSE )
@@ -120,14 +140,14 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,float,float,bf16bf16f32of32)
 	if( ( is_row_major == TRUE ) && ( mtag_a == REORDERED ) )
 	{
 		bli_print_msg(" Reordering of A matrix is not supported in row major case.", __FILE__, __LINE__ );
-		return;
+		goto err_hndl;
 	}
 	// Inputs swapped in column major, A becomes B from kernel point of view.
 	// Reorder is not supported for column major matrices.
 	else if ( ( is_column_major == TRUE ) && ( ( mtag_b == REORDERED ) || ( mtag_a == REORDERED ) ) )
 	{
 		bli_print_msg(" Reordering of column major matrices is not supported.", __FILE__, __LINE__ );
-		return;
+		goto err_hndl;
 	}
 
 	// From 5-loop function point of view
@@ -167,7 +187,10 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,float,float,bf16bf16f32of32)
 	  m, n
 	);
 
-	if( err != BLIS_SUCCESS ) return;
+	if( err != BLIS_SUCCESS )
+	{
+		goto err_hndl;
+	}
 
 	// Initialize a local runtime with global settings if necessary. Note
 	// that in the case that a runtime is passed in, we make a local copy.
@@ -234,4 +257,7 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,float,float,bf16bf16f32of32)
 		);
 	}
 #endif
+
+err_hndl:;
+	LPGEMM_STOP_LOGGER();
 }
diff --git a/addon/aocl_gemm/aocl_gemm_bf16s4f32of32.c b/addon/aocl_gemm/aocl_gemm_bf16s4f32of32.c
index a35cb665b..7891fd4cd 100644
--- a/addon/aocl_gemm/aocl_gemm_bf16s4f32of32.c
+++ b/addon/aocl_gemm/aocl_gemm_bf16s4f32of32.c
@@ -4,7 +4,7 @@
    An object-based framework for developing high-performance BLAS-like
    libraries.
 
-   Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved.
+   Copyright (C) 2024 - 2025, Advanced Micro Devices, Inc. All rights reserved.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are
@@ -41,9 +41,23 @@
 #include "lpgemm_5loop_interface_apis.h"
 #include "lpgemm_config.h"
 #include "lpgemm_utils.h"
+#include "lpgemm_logger.h"
 
 AOCL_GEMM_MATMUL(bfloat16, int8_t, float, float, bf16s4f32of32)
 {
+	LPGEMM_START_LOGGER();
+	LPGEMM_WRITE_LOGGER \
+	(
+	  "bf16s4f32of32", \
+	  order, transa, transb, \
+	  m, n, k, \
+	  ( ( float ) alpha ), \
+	  lda, mem_format_a, \
+	  ldb, mem_format_b, \
+	  ( ( float ) beta ), \
+	  ldc, post_op_unparsed \
+	);
+
     trans_t blis_transa;
     trans_t blis_transb;
 
@@ -53,7 +67,7 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, float, float, bf16s4f32of32)
         bli_print_msg(" AVX512_BF16 ISA not supported by processor, "
                       "cannot perform bf16bf16f32 gemm.",
                       __FILE__, __LINE__);
-        return; // Error.
+		goto err_hndl;
     }
 
     /* Initialize BLIS. */
@@ -63,13 +77,18 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, float, float, bf16s4f32of32)
     aocl_lpgemm_init_global_cntx();
 
     // check for validity of params.
+	int err_no = 0;
     AOCL_GEMM_CHECK(
         "bf16s4f32of32",
         order, transa, transb,
         m, n, k,
         a, lda, mem_format_a,
         b, ldb, mem_format_b,
-        c, ldc);
+        c, ldc, err_no);
+	if ( err_no != 0 )
+	{
+		goto err_hndl;
+	}
 
     /* Map BLAS chars to their corresponding BLIS enumerated type value. */
     bli_param_map_netlib_to_blis_trans(transa, &blis_transa);
@@ -108,14 +127,14 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, float, float, bf16s4f32of32)
     if ((is_row_major == TRUE) && (mtag_a == REORDERED))
     {
         bli_print_msg(" Reordering of A matrix is not supported in row major case.", __FILE__, __LINE__);
-        return;
+		goto err_hndl;
     }
     // Inputs swapped in column major, A becomes B from kernel point of view.
     // Reorder is not supported for column major matrices.
     else if ((is_column_major == TRUE) && ((mtag_b == REORDERED) || (mtag_a == REORDERED)))
     {
         bli_print_msg(" Reordering of column major matrices is not supported.", __FILE__, __LINE__);
-        return;
+		goto err_hndl;
     }
 
     // From 5-loop function point of view
@@ -155,7 +174,9 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, float, float, bf16s4f32of32)
                     m, n, k
                 );
     if (err != BLIS_SUCCESS)
-        return;
+	{
+		goto err_hndl;
+	}
 
     // Convert post op struct to post op linked list format.
     lpgemm_post_op post_op_list[AOCL_MAX_POST_OPS];
@@ -167,7 +188,9 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, float, float, bf16s4f32of32)
                     m, n
                 );
     if (err != BLIS_SUCCESS)
-        return;
+	{
+		goto err_hndl;
+	}
 
     // Initialize a local runtime with global settings if necessary. Note
     // that in the case that a runtime is passed in, we make a local copy.
@@ -183,7 +206,7 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, float, float, bf16s4f32of32)
     {
         // Swapping inputs not possible in case of mixed precision.
         bli_print_msg(" column major not supported yet in bf16s4f32o<f32/bf16>.", __FILE__, __LINE__);
-        return;
+		goto err_hndl;
     }
     else
     {
@@ -204,7 +227,7 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, float, float, bf16s4f32of32)
     {
         // Swapping inputs not possible in case of mixed precision.
         bli_print_msg(" column major not supported yet in bf16s4f32o<f32/bf16>.", __FILE__, __LINE__);
-        return;
+		goto err_hndl;
     }
     else
     {
@@ -220,10 +243,26 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, float, float, bf16s4f32of32)
         );
     }
 #endif
+
+err_hndl:;
+	LPGEMM_STOP_LOGGER();
 }
 
 AOCL_GEMM_MATMUL(bfloat16, int8_t, bfloat16, float, bf16s4f32obf16)
 {
+	LPGEMM_START_LOGGER();
+	LPGEMM_WRITE_LOGGER \
+	(
+	  "bf16s4f32obf16", \
+	  order, transa, transb, \
+	  m, n, k, \
+	  ( ( float ) alpha ), \
+	  lda, mem_format_a, \
+	  ldb, mem_format_b, \
+	  ( ( float ) beta ), \
+	  ldc, post_op_unparsed \
+	);
+
     trans_t blis_transa;
     trans_t blis_transb;
 
@@ -233,7 +272,7 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, bfloat16, float, bf16s4f32obf16)
         bli_print_msg(" AVX512_BF16 ISA not supported by processor, "
                       "cannot perform bf16bf16f32 gemm.",
                       __FILE__, __LINE__);
-        return; // Error.
+		goto err_hndl;
     }
 
     /* Initialize BLIS. */
@@ -243,13 +282,18 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, bfloat16, float, bf16s4f32obf16)
     aocl_lpgemm_init_global_cntx();
 
     // check for validity of params.
+	int err_no = 0;
     AOCL_GEMM_CHECK(
         "bf16s4f32obf16",
         order, transa, transb,
         m, n, k,
         a, lda, mem_format_a,
         b, ldb, mem_format_b,
-        c, ldc);
+        c, ldc, err_no);
+	if ( err_no != 0 )
+	{
+		goto err_hndl;
+	}
 
     /* Map BLAS chars to their corresponding BLIS enumerated type value. */
     bli_param_map_netlib_to_blis_trans(transa, &blis_transa);
@@ -289,14 +333,14 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, bfloat16, float, bf16s4f32obf16)
     if ((is_row_major == TRUE) && (mtag_a == REORDERED))
     {
         bli_print_msg(" Reordering of A matrix is not supported in row major case.", __FILE__, __LINE__);
-        return;
+		goto err_hndl;
     }
     // Inputs swapped in column major, A becomes B from kernel point of view.
     // Reorder is not supported for column major matrices.
     else if ((is_column_major == TRUE) && ((mtag_b == REORDERED) || (mtag_a == REORDERED)))
     {
         bli_print_msg(" Reordering of column major matrices is not supported.", __FILE__, __LINE__);
-        return;
+		goto err_hndl;
     }
 
     // From 5-loop function point of view
@@ -334,7 +378,9 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, bfloat16, float, bf16s4f32obf16)
         m, n, k);
 
     if (err != BLIS_SUCCESS)
-        return;
+	{
+		goto err_hndl;
+	}
 
     // Convert post op struct to post op linked list format.
     lpgemm_post_op post_op_list[AOCL_MAX_POST_OPS];
@@ -344,7 +390,9 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, bfloat16, float, bf16s4f32obf16)
         m, n);
 
     if (err != BLIS_SUCCESS)
-        return;
+	{
+		goto err_hndl;
+	}
 
     // Initialize a local runtime with global settings if necessary. Note
     // that in the case that a runtime is passed in, we make a local copy.
@@ -360,7 +408,7 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, bfloat16, float, bf16s4f32obf16)
     {
         // Swapping inputs not possible in case of mixed precision.
         bli_print_msg(" column major not supported yet in bf16s4f32o<f32/bf16>.", __FILE__, __LINE__);
-        return;
+		goto err_hndl;
     }
     else
     {
@@ -381,7 +429,7 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, bfloat16, float, bf16s4f32obf16)
     {
         // Swapping inputs not possible in case of mixed precision.
         bli_print_msg(" column major not supported yet in bf16s4f32o<f32/bf16>.", __FILE__, __LINE__);
-        return;
+		goto err_hndl;
     }
     else
     {
@@ -395,4 +443,7 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, bfloat16, float, bf16s4f32obf16)
             post_op_list, BF16);
     }
 #endif
+
+err_hndl:;
+	LPGEMM_STOP_LOGGER();
 }
diff --git a/addon/aocl_gemm/aocl_gemm_check.h b/addon/aocl_gemm/aocl_gemm_check.h
index c7f610f35..60bec8bb4 100644
--- a/addon/aocl_gemm/aocl_gemm_check.h
+++ b/addon/aocl_gemm/aocl_gemm_check.h
@@ -37,7 +37,8 @@
                          m, n, k, \
                          a, lda, mtag_a, \
                          b, ldb, mtag_b, \
-                         c, ldc \
+                         c, ldc, \
+                         err_no \
                        ) \
 { \
     int32_t info = 0; \
@@ -98,7 +99,7 @@
  \
         sprintf( print_msg, "** On entry to %6s, parameter number %2i had an illegal value", op_str, info); \
         bli_print_msg(print_msg, __FILE__, __LINE__); \
-        return; \
+        err_no = info; \
     } \
 }
 
diff --git a/addon/aocl_gemm/aocl_gemm_f32f32f32of32.c b/addon/aocl_gemm/aocl_gemm_f32f32f32of32.c
index e3db6e386..84dd229af 100644
--- a/addon/aocl_gemm/aocl_gemm_f32f32f32of32.c
+++ b/addon/aocl_gemm/aocl_gemm_f32f32f32of32.c
@@ -4,7 +4,7 @@
    An object-based framework for developing high-performance BLAS-like
    libraries.
 
-   Copyright (C) 2022 - 2023, Advanced Micro Devices, Inc. All rights reserved.
+   Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are
@@ -41,9 +41,23 @@
 #include "lpgemm_config.h"
 #include "lpgemm_utils.h"
 #include "lpgemm_5loop_interface_apis.h"
+#include "lpgemm_logger.h"
 
 AOCL_GEMM_MATMUL(float,float,float,float,f32f32f32of32)
 {
+	LPGEMM_START_LOGGER();
+	LPGEMM_WRITE_LOGGER \
+	(
+	  "f32f32f32of32", \
+	  order, transa, transb, \
+	  m, n, k, \
+	  ( ( float ) alpha ), \
+	  lda, mem_format_a, \
+	  ldb, mem_format_b, \
+	  ( ( float ) beta ), \
+	  ldc, post_op_unparsed \
+	);
+
 	trans_t blis_transa;
 	trans_t blis_transb;
 
@@ -52,7 +66,7 @@ AOCL_GEMM_MATMUL(float,float,float,float,f32f32f32of32)
 	{
 		bli_print_msg(" AVX2 ISA not supported by processor, "
 				"cannot perform f32f32f32 gemm.", __FILE__, __LINE__ );
-		return; // Error.
+		goto err_hndl;
 	}
 
 	/* Initialize BLIS. */
@@ -61,11 +75,8 @@ AOCL_GEMM_MATMUL(float,float,float,float,f32f32f32of32)
 	// Initialize lpgemm context.
 	aocl_lpgemm_init_global_cntx();
 
-	AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1);
-	AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(s), transa, transb, m, n, k,\
-	      (void*)&alpha, lda, ldb, (void*)&beta, ldc);
-
 	// check for validity of params.
+	int err_no = 0;
 	AOCL_GEMM_CHECK
 	(
 	  "f32f32f32of32",
@@ -73,8 +84,13 @@ AOCL_GEMM_MATMUL(float,float,float,float,f32f32f32of32)
 	  m, n, k,
 	  a, lda, mem_format_a,
 	  b, ldb, mem_format_b,
-	  c, ldc
+	  c, ldc,
+	  err_no
 	);
+	if ( err_no != 0 )
+	{
+		goto err_hndl;
+	}
 
 	/* Map BLAS chars to their corresponding BLIS enumerated type value. */
 	bli_param_map_netlib_to_blis_trans( transa, &blis_transa );
@@ -113,7 +129,7 @@ AOCL_GEMM_MATMUL(float,float,float,float,f32f32f32of32)
 	if ( ( is_row_major == TRUE ) && ( mtag_a == REORDERED ) )
 	{
 		bli_print_msg(" Reordering of A matrix is not supported.", __FILE__, __LINE__ );
-		return; // Error.
+		goto err_hndl;
 	}
 
 	// Inputs swapped in column major, A becomes B from kernel point of view.
@@ -121,7 +137,7 @@ AOCL_GEMM_MATMUL(float,float,float,float,f32f32f32of32)
 	{
 		bli_print_msg(" Reordering of column major matrices is not supported.", 
 			__FILE__, __LINE__ );
-		return; //Error
+		goto err_hndl;
 	}
 
 	// By default enable packing for B matrix. Before the 5 loop, based on
@@ -159,7 +175,10 @@ AOCL_GEMM_MATMUL(float,float,float,float,f32f32f32of32)
 	  m, n
 	);
 
-	if( err != BLIS_SUCCESS ) return;
+	if( err != BLIS_SUCCESS )
+	{
+		goto err_hndl;
+	}
 
 	// Initialize a local runtime with global settings if necessary. Note
 	// that in the case that a runtime is passed in, we make a local copy.
@@ -233,5 +252,6 @@ AOCL_GEMM_MATMUL(float,float,float,float,f32f32f32of32)
 	}
 #endif
 
-	AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
+err_hndl:;
+	LPGEMM_STOP_LOGGER();
 }
diff --git a/addon/aocl_gemm/aocl_gemm_s8s8s16os16.c b/addon/aocl_gemm/aocl_gemm_s8s8s16os16.c
index 2f73fcf42..80eae4db0 100644
--- a/addon/aocl_gemm/aocl_gemm_s8s8s16os16.c
+++ b/addon/aocl_gemm/aocl_gemm_s8s8s16os16.c
@@ -4,7 +4,7 @@
    An object-based framework for developing high-performance BLAS-like
    libraries.
 
-   Copyright (C) 2023 - 2024, Advanced Micro Devices, Inc. All rights reserved.
+   Copyright (C) 2023 - 2025, Advanced Micro Devices, Inc. All rights reserved.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are
@@ -41,9 +41,23 @@
 #include "lpgemm_thread_decor_openmp.h"
 #include "lpgemm_post_ops.h"
 #include "lpgemm_utils_s8.h"
+#include "lpgemm_logger.h"
 
 AOCL_GEMM_MATMUL(int8_t,int8_t,int16_t,int16_t,s8s8s16os16)
 {
+	LPGEMM_START_LOGGER();
+	LPGEMM_WRITE_LOGGER \
+	(
+	  "s8s8s16os16", \
+	  order, transa, transb, \
+	  m, n, k, \
+	  ( ( float ) alpha ), \
+	  lda, mem_format_a, \
+	  ldb, mem_format_b, \
+	  ( ( float ) beta ), \
+	  ldc, post_op_unparsed \
+	);
+
 	trans_t blis_transa;
 	trans_t blis_transb;
 
@@ -52,7 +66,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int16_t,int16_t,s8s8s16os16)
 	{
 		bli_print_msg(" AVX2 ISA not supported by processor, "
 				"cannot perform s8s8s16 gemm.", __FILE__, __LINE__ );
-		return; // Error.
+		goto err_hndl;
 	}
 
 	/* Initialize BLIS. */
@@ -62,6 +76,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int16_t,int16_t,s8s8s16os16)
 	aocl_lpgemm_init_global_cntx();
 
 	// check for validity of params.
+	int err_no = 0;
 	AOCL_GEMM_CHECK
 	(
 	  "s8s8s16os16",
@@ -69,8 +84,13 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int16_t,int16_t,s8s8s16os16)
 	  m, n, k,
 	  a, lda, mem_format_a,
 	  b, ldb, mem_format_b,
-	  c, ldc
+	  c, ldc,
+	  err_no
 	);
+	if ( err_no != 0 )
+	{
+		goto err_hndl;
+	}
 
 	/* Map BLAS chars to their corresponding BLIS enumerated type value. */
 	bli_param_map_netlib_to_blis_trans(transa, &blis_transa);
@@ -81,13 +101,13 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int16_t,int16_t,s8s8s16os16)
 	if ( ( blis_transb != BLIS_NO_TRANSPOSE ) )
 	{
 		bli_print_msg(" Transpose of B matrices is not supported.", __FILE__, __LINE__ );
-		return; // Error.
+		goto err_hndl;
 	}
 
 	if ( ( order != 'r' ) && ( order != 'R' ) )
 	{
 		bli_print_msg(" Operation only supports row-major matrices.", __FILE__, __LINE__ );
-		return; // Only row major supported.
+		goto err_hndl;
 	}
 
 	inc_t rs_a = lda;
@@ -126,7 +146,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int16_t,int16_t,s8s8s16os16)
 	if ( !( bli_is_trans( blis_transa ) ) && ( mtag_a != UNPACKED ) )
 	{
 		bli_print_msg(" A matrix needs to be unpacked.", __FILE__, __LINE__ );
-		return; // Error.
+		goto err_hndl;
 	}
 
 	// Convert post op struct to post op linked list format.
@@ -138,7 +158,10 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int16_t,int16_t,s8s8s16os16)
 	  m, n
 	);
 
-	if( err != BLIS_SUCCESS ) return;
+	if( err != BLIS_SUCCESS )
+	{
+		goto err_hndl;
+	}
 
 	// Initialize a local runtime with global settings if necessary. Note
 	// that in the case that a runtime is passed in, we make a local copy.
@@ -171,4 +194,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int16_t,int16_t,s8s8s16os16)
 	  post_op_list, S16
 	);
 #endif
+
+err_hndl:;
+	LPGEMM_STOP_LOGGER();
 }
diff --git a/addon/aocl_gemm/aocl_gemm_s8s8s16os8.c b/addon/aocl_gemm/aocl_gemm_s8s8s16os8.c
index 19bbfff7b..501adc148 100644
--- a/addon/aocl_gemm/aocl_gemm_s8s8s16os8.c
+++ b/addon/aocl_gemm/aocl_gemm_s8s8s16os8.c
@@ -4,7 +4,7 @@
    An object-based framework for developing high-performance BLAS-like
    libraries.
 
-   Copyright (C) 2023 - 2024, Advanced Micro Devices, Inc. All rights reserved.
+   Copyright (C) 2023 - 2025, Advanced Micro Devices, Inc. All rights reserved.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are
@@ -41,9 +41,23 @@
 #include "lpgemm_thread_decor_openmp.h"
 #include "lpgemm_post_ops.h"
 #include "lpgemm_utils_s8.h"
+#include "lpgemm_logger.h"
 
 AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int16_t,s8s8s16os8)
 {
+	LPGEMM_START_LOGGER();
+	LPGEMM_WRITE_LOGGER \
+	(
+	  "s8s8s16os8", \
+	  order, transa, transb, \
+	  m, n, k, \
+	  ( ( float ) alpha ), \
+	  lda, mem_format_a, \
+	  ldb, mem_format_b, \
+	  ( ( float ) beta ), \
+	  ldc, post_op_unparsed \
+	);
+
 	trans_t blis_transa;
 	trans_t blis_transb;
 
@@ -52,7 +66,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int16_t,s8s8s16os8)
 	{
 		bli_print_msg(" AVX2 ISA not supported by processor, "
 				"cannot perform s8s8s16 gemm.", __FILE__, __LINE__ );
-		return; // Error.
+		goto err_hndl;
 	}
 
 	/* Initialize BLIS. */
@@ -62,6 +76,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int16_t,s8s8s16os8)
 	aocl_lpgemm_init_global_cntx();
 
 	// check for validity of params.
+	int err_no = 0;
 	AOCL_GEMM_CHECK
 	(
 	  "s8s8s16os8",
@@ -69,8 +84,13 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int16_t,s8s8s16os8)
 	  m, n, k,
 	  a, lda, mem_format_a,
 	  b, ldb, mem_format_b,
-	  c, ldc
+	  c, ldc,
+	  err_no
 	);
+	if ( err_no != 0 )
+	{
+		goto err_hndl;
+	}
 
 	/* Map BLAS chars to their corresponding BLIS enumerated type value. */
 	bli_param_map_netlib_to_blis_trans(transa, &blis_transa);
@@ -81,13 +101,13 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int16_t,s8s8s16os8)
 	if ( ( blis_transb != BLIS_NO_TRANSPOSE ) )
 	{
 		bli_print_msg(" Transpose of B matrices is not supported.", __FILE__, __LINE__ );
-		return; // Error.
+		goto err_hndl;
 	}
 
 	if ( ( order != 'r' ) && ( order != 'R' ) )
 	{
 		bli_print_msg(" Operation only supports row-major matrices.", __FILE__, __LINE__ );
-		return; // Only row major supported.
+		goto err_hndl;
 	}
 
 	inc_t rs_a = lda;
@@ -126,7 +146,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int16_t,s8s8s16os8)
 	if ( !( bli_is_trans( blis_transa ) ) && ( mtag_a != UNPACKED ) )
 	{
 		bli_print_msg(" A matrix needs to be unpacked.", __FILE__, __LINE__ );
-		return; // Error.
+		goto err_hndl;
 	}
 
 	// Convert post op struct to post op linked list format.
@@ -138,7 +158,10 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int16_t,s8s8s16os8)
 	  m, n
 	);
 
-	if( err != BLIS_SUCCESS ) return;
+	if( err != BLIS_SUCCESS )
+	{
+		goto err_hndl;
+	}
 
 	// Initialize a local runtime with global settings if necessary. Note
 	// that in the case that a runtime is passed in, we make a local copy.
@@ -171,4 +194,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int16_t,s8s8s16os8)
 	  post_op_list, S8
 	);
 #endif
+
+err_hndl:;
+	LPGEMM_STOP_LOGGER();
 }
diff --git a/addon/aocl_gemm/aocl_gemm_s8s8s32os32.c b/addon/aocl_gemm/aocl_gemm_s8s8s32os32.c
index 747f9155e..b3d28a1d6 100644
--- a/addon/aocl_gemm/aocl_gemm_s8s8s32os32.c
+++ b/addon/aocl_gemm/aocl_gemm_s8s8s32os32.c
@@ -4,7 +4,7 @@
    An object-based framework for developing high-performance BLAS-like
    libraries.
 
-   Copyright (C) 2023 - 2024, Advanced Micro Devices, Inc. All rights reserved.
+   Copyright (C) 2023 - 2025, Advanced Micro Devices, Inc. All rights reserved.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are
@@ -41,9 +41,23 @@
 #include "lpgemm_5loop_interface_apis.h"
 #include "lpgemm_config.h"
 #include "lpgemm_utils_s8.h"
+#include "lpgemm_logger.h"
 
 AOCL_GEMM_MATMUL(int8_t,int8_t,int32_t,int32_t,s8s8s32os32)
 {
+	LPGEMM_START_LOGGER();
+	LPGEMM_WRITE_LOGGER \
+	(
+	  "s8s8s32os32", \
+	  order, transa, transb, \
+	  m, n, k, \
+	  ( ( float ) alpha ), \
+	  lda, mem_format_a, \
+	  ldb, mem_format_b, \
+	  ( ( float ) beta ), \
+	  ldc, post_op_unparsed \
+	);
+
 	trans_t blis_transa;
 	trans_t blis_transb;
 
@@ -52,7 +66,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int32_t,int32_t,s8s8s32os32)
 	{
 		bli_print_msg(" AVX512_VNNI ISA not supported by processor, "
 				"cannot perform s8s8s32 gemm.", __FILE__, __LINE__ );
-		return; // Error.
+		goto err_hndl;
 	}
 
 	/* Initialize BLIS. */
@@ -62,6 +76,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int32_t,int32_t,s8s8s32os32)
 	aocl_lpgemm_init_global_cntx();
 	
 	// check for validity of params.
+	int err_no = 0;
 	AOCL_GEMM_CHECK
 	(
 	  "s8s8s32os32",
@@ -69,8 +84,13 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int32_t,int32_t,s8s8s32os32)
 	  m, n, k,
 	  a, lda, mem_format_a,
 	  b, ldb, mem_format_b,
-	  c, ldc
+	  c, ldc,
+	  err_no
 	);
+	if ( err_no != 0 )
+	{
+		goto err_hndl;
+	}
 
 	/* Map BLAS chars to their corresponding BLIS enumerated type value. */
 	bli_param_map_netlib_to_blis_trans( transa, &blis_transa );
@@ -85,7 +105,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int32_t,int32_t,s8s8s32os32)
 	{
 		bli_print_msg("Column major inputs not supported with Post-ops.",
 					  __FILE__, __LINE__);
-		return;
+		goto err_hndl;
 	}
 	
 	inc_t rs_a = lda;
@@ -120,7 +140,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int32_t,int32_t,s8s8s32os32)
 	{
 		bli_print_msg(" Reordering of A matrix is not supported " 
 						"in row major case.", __FILE__, __LINE__);
-		return;
+		goto err_hndl;
 	}
 	// Inputs swapped in column major, A becomes B from kernel point of view.
 	// Reorder is not supported for column major matrices.
@@ -129,7 +149,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int32_t,int32_t,s8s8s32os32)
 	{
 		bli_print_msg(" Reordering of column major matrices " 
 						"is not supported.", __FILE__, __LINE__);
-		return;
+		goto err_hndl;
 	}
 
 	// From 5-loop function point of view
@@ -169,7 +189,10 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int32_t,int32_t,s8s8s32os32)
 	  m, n
 	);
 
-	if( err != BLIS_SUCCESS ) return;
+	if( err != BLIS_SUCCESS )
+	{
+		goto err_hndl;
+	}
 
 	// Initialize a local runtime with global settings if necessary. Note
 	// that in the case that a runtime is passed in, we make a local copy.
@@ -228,4 +251,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int32_t,int32_t,s8s8s32os32)
 			post_op_list, S32);
 	}
 #endif
+
+err_hndl:;
+	LPGEMM_STOP_LOGGER();
 }
diff --git a/addon/aocl_gemm/aocl_gemm_s8s8s32os8.c b/addon/aocl_gemm/aocl_gemm_s8s8s32os8.c
index ffeef5ba1..1c092ed08 100644
--- a/addon/aocl_gemm/aocl_gemm_s8s8s32os8.c
+++ b/addon/aocl_gemm/aocl_gemm_s8s8s32os8.c
@@ -4,7 +4,7 @@
    An object-based framework for developing high-performance BLAS-like
    libraries.
 
-   Copyright (C) 2023 - 2024, Advanced Micro Devices, Inc. All rights reserved.
+   Copyright (C) 2023 - 2025, Advanced Micro Devices, Inc. All rights reserved.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are
@@ -41,9 +41,23 @@
 #include "lpgemm_5loop_interface_apis.h"
 #include "lpgemm_config.h"
 #include "lpgemm_utils_s8.h"
+#include "lpgemm_logger.h"
 
 AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int32_t,s8s8s32os8)
 {
+	LPGEMM_START_LOGGER();
+	LPGEMM_WRITE_LOGGER \
+	(
+	  "s8s8s32os8", \
+	  order, transa, transb, \
+	  m, n, k, \
+	  ( ( float ) alpha ), \
+	  lda, mem_format_a, \
+	  ldb, mem_format_b, \
+	  ( ( float ) beta ), \
+	  ldc, post_op_unparsed \
+	);
+
 	trans_t blis_transa;
 	trans_t blis_transb;
 
@@ -52,7 +66,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int32_t,s8s8s32os8)
 	{
 		bli_print_msg(" AVX512_VNNI ISA not supported by processor, "
 				"cannot perform s8s8s32 gemm.", __FILE__, __LINE__ );
-		return; // Error.
+		goto err_hndl;
 	}
 
 	/* Initialize BLIS. */
@@ -62,6 +76,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int32_t,s8s8s32os8)
 	aocl_lpgemm_init_global_cntx();
 
 	// check for validity of params.
+	int err_no = 0;
 	AOCL_GEMM_CHECK
 	(
 	  "s8s8s32os8",
@@ -69,8 +84,13 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int32_t,s8s8s32os8)
 	  m, n, k,
 	  a, lda, mem_format_a,
 	  b, ldb, mem_format_b,
-	  c, ldc
+	  c, ldc,
+	  err_no
 	);
+	if ( err_no != 0 )
+	{
+		goto err_hndl;
+	}
 
 	/* Map BLAS chars to their corresponding BLIS enumerated type value. */
 	bli_param_map_netlib_to_blis_trans( transa, &blis_transa );
@@ -85,7 +105,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int32_t,s8s8s32os8)
 	{
 		bli_print_msg("Column major inputs not supported with Post-ops.",
 					  __FILE__, __LINE__);
-		return;
+		goto err_hndl;
 	}
 	
 	// The strides are set assuming a row major kernel.
@@ -120,7 +140,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int32_t,s8s8s32os8)
 	{
 		bli_print_msg(" Reordering of A matrix is not supported in " 
 						" row major case.", __FILE__, __LINE__);
-		return;
+		goto err_hndl;
 	}
 	// Inputs swapped in column major, A becomes B from kernel point of view.
 	// Reorder is not supported for column major matrices.
@@ -129,7 +149,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int32_t,s8s8s32os8)
 	{
 		bli_print_msg(" Reordering of column major matrices is " 
 						" not supported.", __FILE__, __LINE__);
-		return;
+		goto err_hndl;
 	}
 
 	// From 5-loop function point of view
@@ -169,7 +189,10 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int32_t,s8s8s32os8)
 	  m, n
 	);
 
-	if( err != BLIS_SUCCESS ) return;
+	if( err != BLIS_SUCCESS )
+	{
+		goto err_hndl;
+	}
 
 	// Initialize a local runtime with global settings if necessary. Note
 	// that in the case that a runtime is passed in, we make a local copy.
@@ -235,4 +258,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int32_t,s8s8s32os8)
 		);
 	}
 #endif
+
+err_hndl:;
+	LPGEMM_STOP_LOGGER();
 }
diff --git a/addon/aocl_gemm/aocl_gemm_u8s8s16os16.c b/addon/aocl_gemm/aocl_gemm_u8s8s16os16.c
index d6b179f29..867080522 100644
--- a/addon/aocl_gemm/aocl_gemm_u8s8s16os16.c
+++ b/addon/aocl_gemm/aocl_gemm_u8s8s16os16.c
@@ -4,7 +4,7 @@
    An object-based framework for developing high-performance BLAS-like
    libraries.
 
-   Copyright (C) 2022 - 2024, Advanced Micro Devices, Inc. All rights reserved.
+   Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are
@@ -41,9 +41,23 @@
 #include "lpgemm_utils.h"
 #include "lpgemm_thread_decor_openmp.h"
 #include "lpgemm_post_ops.h"
+#include "lpgemm_logger.h"
 
 AOCL_GEMM_MATMUL(uint8_t,int8_t,int16_t,int16_t,u8s8s16os16)
 {
+	LPGEMM_START_LOGGER();
+	LPGEMM_WRITE_LOGGER \
+	(
+	  "u8s8s16os16", \
+	  order, transa, transb, \
+	  m, n, k, \
+	  ( ( float ) alpha ), \
+	  lda, mem_format_a, \
+	  ldb, mem_format_b, \
+	  ( ( float ) beta ), \
+	  ldc, post_op_unparsed \
+	);
+
 	trans_t blis_transa;
 	trans_t blis_transb;
 
@@ -52,7 +66,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int16_t,int16_t,u8s8s16os16)
 	{
 		bli_print_msg(" AVX2 ISA not supported by processor, "
 				"cannot perform u8s8s16 gemm.", __FILE__, __LINE__ );
-		return; // Error.
+		goto err_hndl;
 	}
 
 	/* Initialize BLIS. */
@@ -62,6 +76,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int16_t,int16_t,u8s8s16os16)
 	aocl_lpgemm_init_global_cntx();
 
 	// check for validity of params.
+	int err_no = 0;
 	AOCL_GEMM_CHECK
 	(
 	  "u8s8s16os16",
@@ -69,8 +84,13 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int16_t,int16_t,u8s8s16os16)
 	  m, n, k,
 	  a, lda, mem_format_a,
 	  b, ldb, mem_format_b,
-	  c, ldc
+	  c, ldc,
+	  err_no
 	);
+	if ( err_no != 0 )
+	{
+		goto err_hndl;
+	}
 
 	/* Map BLAS chars to their corresponding BLIS enumerated type value. */
 	bli_param_map_netlib_to_blis_trans(transa, &blis_transa);
@@ -81,13 +101,13 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int16_t,int16_t,u8s8s16os16)
 	if ( ( blis_transb != BLIS_NO_TRANSPOSE ) )
 	{
 		bli_print_msg(" Transpose of B matrices is not supported.", __FILE__, __LINE__ );
-		return; // Error.
+		goto err_hndl;
 	}
 
 	if ( ( order != 'r' ) && ( order != 'R' ) )
 	{
 		bli_print_msg(" Operation only supports row-major matrices.", __FILE__, __LINE__ );
-		return; // Only row major supported.
+		goto err_hndl;
 	}
 
 	inc_t rs_a = lda;
@@ -126,7 +146,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int16_t,int16_t,u8s8s16os16)
 	if ( !( bli_is_trans( blis_transa ) ) && ( mtag_a != UNPACKED ) )
 	{
 		bli_print_msg(" A matrix needs to be unpacked.", __FILE__, __LINE__ );
-		return; // Error.
+		goto err_hndl;
 	}
 
 	// Convert post op struct to post op linked list format.
@@ -138,7 +158,10 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int16_t,int16_t,u8s8s16os16)
 	  m, n
 	);
 
-	if( err != BLIS_SUCCESS ) return;
+	if( err != BLIS_SUCCESS )
+	{
+		goto err_hndl;
+	}
 
 	// Initialize a local runtime with global settings if necessary. Note
 	// that in the case that a runtime is passed in, we make a local copy.
@@ -171,4 +194,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int16_t,int16_t,u8s8s16os16)
 	  post_op_list, S16
 	);
 #endif
+
+err_hndl:;
+	LPGEMM_STOP_LOGGER();
 }
diff --git a/addon/aocl_gemm/aocl_gemm_u8s8s16os8.c b/addon/aocl_gemm/aocl_gemm_u8s8s16os8.c
index 3c10c7530..38ff439c6 100644
--- a/addon/aocl_gemm/aocl_gemm_u8s8s16os8.c
+++ b/addon/aocl_gemm/aocl_gemm_u8s8s16os8.c
@@ -4,7 +4,7 @@
    An object-based framework for developing high-performance BLAS-like
    libraries.
 
-   Copyright (C) 2022 - 2024, Advanced Micro Devices, Inc. All rights reserved.
+   Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are
@@ -41,9 +41,22 @@
 #include "lpgemm_utils.h"
 #include "lpgemm_thread_decor_openmp.h"
 #include "lpgemm_post_ops.h"
+#include "lpgemm_logger.h"
 
 AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int16_t,u8s8s16os8)
 {
+	LPGEMM_START_LOGGER();
+	LPGEMM_WRITE_LOGGER \
+	(
+	  "u8s8s16os8", \
+	  order, transa, transb, \
+	  m, n, k, \
+	  ( ( float ) alpha ), \
+	  lda, mem_format_a, \
+	  ldb, mem_format_b, \
+	  ( ( float ) beta ), \
+	  ldc, post_op_unparsed \
+	);
 	trans_t blis_transa;
 	trans_t blis_transb;
 
@@ -52,7 +65,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int16_t,u8s8s16os8)
 	{
 		bli_print_msg(" AVX2 ISA not supported by processor, "
 				"cannot perform u8s8s16 gemm.", __FILE__, __LINE__ );
-		return; // Error.
+		goto err_hndl;
 	}
 
 	/* Initialize BLIS. */
@@ -62,6 +75,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int16_t,u8s8s16os8)
 	aocl_lpgemm_init_global_cntx();
 
 	// check for validity of params.
+	int err_no = 0;
 	AOCL_GEMM_CHECK
 	(
 	  "u8s8s16os8",
@@ -69,8 +83,13 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int16_t,u8s8s16os8)
 	  m, n, k,
 	  a, lda, mem_format_a,
 	  b, ldb, mem_format_b,
-	  c, ldc
+	  c, ldc,
+	  err_no
 	);
+	if ( err_no != 0 )
+	{
+		goto err_hndl;
+	}
 
 	/* Map BLAS chars to their corresponding BLIS enumerated type value. */
 	bli_param_map_netlib_to_blis_trans(transa, &blis_transa);
@@ -81,13 +100,13 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int16_t,u8s8s16os8)
 	if ( ( blis_transb != BLIS_NO_TRANSPOSE ) )
 	{
 		bli_print_msg(" Transpose of B matrices is not supported.", __FILE__, __LINE__ );
-		return; // Error.
+		goto err_hndl;
 	}
 
 	if ( ( order != 'r' ) && ( order != 'R' ) )
 	{
 		bli_print_msg(" Operation only supports row-major matrices.", __FILE__, __LINE__ );
-		return; // Only row major supported.
+		goto err_hndl;
 	}
 
 	inc_t rs_a = lda;
@@ -126,7 +145,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int16_t,u8s8s16os8)
 	if ( !( bli_is_trans( blis_transa ) ) && ( mtag_a != UNPACKED ) )
 	{
 		bli_print_msg(" A matrix needs to be unpacked.", __FILE__, __LINE__ );
-		return; // Error.
+		goto err_hndl;
 	}
 
 	// Convert post op struct to post op linked list format.
@@ -138,7 +157,10 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int16_t,u8s8s16os8)
 	  m, n
 	);
 
-	if( err != BLIS_SUCCESS ) return;
+	if( err != BLIS_SUCCESS )
+	{
+		goto err_hndl;
+	}
 
 	// Initialize a local runtime with global settings if necessary. Note
 	// that in the case that a runtime is passed in, we make a local copy.
@@ -171,4 +193,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int16_t,u8s8s16os8)
 	  post_op_list, S8
 	);
 #endif
+
+err_hndl:;
+	LPGEMM_STOP_LOGGER();
 }
diff --git a/addon/aocl_gemm/aocl_gemm_u8s8s16ou8.c b/addon/aocl_gemm/aocl_gemm_u8s8s16ou8.c
index f29028d57..c6a6e93b5 100644
--- a/addon/aocl_gemm/aocl_gemm_u8s8s16ou8.c
+++ b/addon/aocl_gemm/aocl_gemm_u8s8s16ou8.c
@@ -4,7 +4,7 @@
    An object-based framework for developing high-performance BLAS-like
    libraries.
 
-   Copyright (C) 2023 - 2024, Advanced Micro Devices, Inc. All rights reserved.
+   Copyright (C) 2023 - 2025, Advanced Micro Devices, Inc. All rights reserved.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are
@@ -41,9 +41,22 @@
 #include "lpgemm_utils.h"
 #include "lpgemm_thread_decor_openmp.h"
 #include "lpgemm_post_ops.h"
+#include "lpgemm_logger.h"
 
 AOCL_GEMM_MATMUL(uint8_t,int8_t,uint8_t,int16_t,u8s8s16ou8)
 {
+	LPGEMM_START_LOGGER();
+	LPGEMM_WRITE_LOGGER \
+	(
+	  "u8s8s16ou8", \
+	  order, transa, transb, \
+	  m, n, k, \
+	  ( ( float ) alpha ), \
+	  lda, mem_format_a, \
+	  ldb, mem_format_b, \
+	  ( ( float ) beta ), \
+	  ldc, post_op_unparsed \
+	);
 	trans_t blis_transa;
 	trans_t blis_transb;
 
@@ -52,7 +65,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,uint8_t,int16_t,u8s8s16ou8)
 	{
 		bli_print_msg(" AVX2 ISA not supported by processor, "
 				"cannot perform u8s8s16 gemm.", __FILE__, __LINE__ );
-		return; // Error.
+		goto err_hndl;
 	}
 
 	/* Initialize BLIS. */
@@ -62,6 +75,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,uint8_t,int16_t,u8s8s16ou8)
 	aocl_lpgemm_init_global_cntx();
 
 	// check for validity of params.
+	int err_no = 0;
 	AOCL_GEMM_CHECK
 	(
 	  "u8s8s16ou8",
@@ -69,8 +83,13 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,uint8_t,int16_t,u8s8s16ou8)
 	  m, n, k,
 	  a, lda, mem_format_a,
 	  b, ldb, mem_format_b,
-	  c, ldc
+	  c, ldc,
+	  err_no
 	);
+	if ( err_no != 0 )
+	{
+		goto err_hndl;
+	}
 
 	/* Map BLAS chars to their corresponding BLIS enumerated type value. */
 	bli_param_map_netlib_to_blis_trans(transa, &blis_transa);
@@ -81,13 +100,13 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,uint8_t,int16_t,u8s8s16ou8)
 	if ( ( blis_transb != BLIS_NO_TRANSPOSE ) )
 	{
 		bli_print_msg(" Transpose of B matrices is not supported.", __FILE__, __LINE__ );
-		return; // Error.
+		goto err_hndl;
 	}
 
 	if ( ( order != 'r' ) && ( order != 'R' ) )
 	{
 		bli_print_msg(" Operation only supports row-major matrices.", __FILE__, __LINE__ );
-		return; // Only row major supported.
+		goto err_hndl;
 	}
 
 	inc_t rs_a = lda;
@@ -126,7 +145,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,uint8_t,int16_t,u8s8s16ou8)
 	if ( !( bli_is_trans( blis_transa ) ) && ( mtag_a != UNPACKED ) )
 	{
 		bli_print_msg(" A matrix needs to be unpacked.", __FILE__, __LINE__ );
-		return; // Error.
+		goto err_hndl;
 	}
 
 	// Convert post op struct to post op linked list format.
@@ -138,7 +157,10 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,uint8_t,int16_t,u8s8s16ou8)
 	  m, n
 	);
 
-	if( err != BLIS_SUCCESS ) return;
+	if( err != BLIS_SUCCESS )
+	{
+		goto err_hndl;
+	}
 
 	// Initialize a local runtime with global settings if necessary. Note
 	// that in the case that a runtime is passed in, we make a local copy.
@@ -171,4 +193,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,uint8_t,int16_t,u8s8s16ou8)
 	  post_op_list, U8
 	);
 #endif
+
+err_hndl:;
+	LPGEMM_STOP_LOGGER();
 }
diff --git a/addon/aocl_gemm/aocl_gemm_u8s8s32os32.c b/addon/aocl_gemm/aocl_gemm_u8s8s32os32.c
index 56c1b06db..5902ef445 100644
--- a/addon/aocl_gemm/aocl_gemm_u8s8s32os32.c
+++ b/addon/aocl_gemm/aocl_gemm_u8s8s32os32.c
@@ -4,7 +4,7 @@
    An object-based framework for developing high-performance BLAS-like
    libraries.
 
-   Copyright (C) 2022 - 2024, Advanced Micro Devices, Inc. All rights reserved.
+   Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are
@@ -41,9 +41,22 @@
 #include "lpgemm_5loop_interface_apis.h"
 #include "lpgemm_config.h"
 #include "lpgemm_utils.h"
+#include "lpgemm_logger.h"
 
 AOCL_GEMM_MATMUL(uint8_t,int8_t,int32_t,int32_t,u8s8s32os32)
 {
+	LPGEMM_START_LOGGER();
+	LPGEMM_WRITE_LOGGER \
+	(
+	  "u8s8s32os32", \
+	  order, transa, transb, \
+	  m, n, k, \
+	  ( ( float ) alpha ), \
+	  lda, mem_format_a, \
+	  ldb, mem_format_b, \
+	  ( ( float ) beta ), \
+	  ldc, post_op_unparsed \
+	);
 	trans_t blis_transa;
 	trans_t blis_transb;
 
@@ -52,7 +65,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int32_t,int32_t,u8s8s32os32)
 	{
 		bli_print_msg(" AVX512_VNNI ISA not supported by processor, "
 				"cannot perform u8s8s32 gemm.", __FILE__, __LINE__ );
-		return; // Error.
+		goto err_hndl;
 	}
 
 	/* Initialize BLIS. */
@@ -62,6 +75,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int32_t,int32_t,u8s8s32os32)
 	aocl_lpgemm_init_global_cntx();
 
 	// check for validity of params.
+	int err_no = 0;
 	AOCL_GEMM_CHECK
 	(
 	  "u8s8s32os32",
@@ -69,8 +83,13 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int32_t,int32_t,u8s8s32os32)
 	  m, n, k,
 	  a, lda, mem_format_a,
 	  b, ldb, mem_format_b,
-	  c, ldc
+	  c, ldc,
+	  err_no
 	);
+	if ( err_no != 0 )
+	{
+		goto err_hndl;
+	}
 
 	/* Map BLAS chars to their corresponding BLIS enumerated type value. */
 	bli_param_map_netlib_to_blis_trans( transa, &blis_transa );
@@ -85,7 +104,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int32_t,int32_t,u8s8s32os32)
 	{
 		bli_print_msg("Column major inputs not supported with Post-ops.",
 					  __FILE__, __LINE__);
-		return;
+		goto err_hndl;
 	}
 	
 	inc_t rs_a = lda;
@@ -121,7 +140,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int32_t,int32_t,u8s8s32os32)
 		bli_print_msg(" Reordering of A matrix is not supported "
 					  "in row major case.",
 					  __FILE__, __LINE__);
-		return;
+		goto err_hndl;
 	}
 	// Inputs swapped in column major, A becomes B from kernel point of view.
 	// Reorder is not supported for column major matrices.
@@ -131,7 +150,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int32_t,int32_t,u8s8s32os32)
 		bli_print_msg(" Reordering of column major matrices "
 					  "is not supported.",
 					  __FILE__, __LINE__);
-		return;
+		goto err_hndl;
 	}
 
 	// From 5-loop function point of view
@@ -171,7 +190,10 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int32_t,int32_t,u8s8s32os32)
 	  m, n
 	);
 
-	if( err != BLIS_SUCCESS ) return;
+	if( err != BLIS_SUCCESS )
+	{
+		goto err_hndl;
+	}
 
 	// Initialize a local runtime with global settings if necessary. Note
 	// that in the case that a runtime is passed in, we make a local copy.
@@ -230,4 +252,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int32_t,int32_t,u8s8s32os32)
 			post_op_list, S32);
 	}
 #endif
+
+err_hndl:;
+	LPGEMM_STOP_LOGGER();
 }
diff --git a/addon/aocl_gemm/aocl_gemm_u8s8s32os8.c b/addon/aocl_gemm/aocl_gemm_u8s8s32os8.c
index 13184b593..8756713f1 100644
--- a/addon/aocl_gemm/aocl_gemm_u8s8s32os8.c
+++ b/addon/aocl_gemm/aocl_gemm_u8s8s32os8.c
@@ -4,7 +4,7 @@
    An object-based framework for developing high-performance BLAS-like
    libraries.
 
-   Copyright (C) 2022 - 2024, Advanced Micro Devices, Inc. All rights reserved.
+   Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved.
 
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are
@@ -41,9 +41,22 @@
 #include "lpgemm_5loop_interface_apis.h"
 #include "lpgemm_config.h"
 #include "lpgemm_utils.h"
+#include "lpgemm_logger.h"
 
 AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int32_t,u8s8s32os8)
 {
+	LPGEMM_START_LOGGER();
+	LPGEMM_WRITE_LOGGER \
+	(
+	  "u8s8s32os8", \
+	  order, transa, transb, \
+	  m, n, k, \
+	  ( ( float ) alpha ), \
+	  lda, mem_format_a, \
+	  ldb, mem_format_b, \
+	  ( ( float ) beta ), \
+	  ldc, post_op_unparsed \
+	);
 	trans_t blis_transa;
 	trans_t blis_transb;
 
@@ -52,7 +65,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int32_t,u8s8s32os8)
 	{
 		bli_print_msg(" AVX512_VNNI ISA not supported by processor, "
 				"cannot perform u8s8s32 gemm.", __FILE__, __LINE__ );
-		return; // Error.
+		goto err_hndl;
 	}
 
 	/* Initialize BLIS. */
@@ -62,6 +75,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int32_t,u8s8s32os8)
 	aocl_lpgemm_init_global_cntx();
 
 	// check for validity of params.
+	int err_no = 0;
 	AOCL_GEMM_CHECK
 	(
 	  "u8s8s32os8",
@@ -69,8 +83,13 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int32_t,u8s8s32os8)
 	  m, n, k,
 	  a, lda, mem_format_a,
 	  b, ldb, mem_format_b,
-	  c, ldc
+	  c, ldc,
+	  err_no
 	);
+	if ( err_no != 0 )
+	{
+		goto err_hndl;
+	}
 
 	/* Map BLAS chars to their corresponding BLIS enumerated type value. */
 	bli_param_map_netlib_to_blis_trans(transa, &blis_transa);
@@ -85,7 +104,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int32_t,u8s8s32os8)
 	{
 		bli_print_msg("Column major inputs not supported with Post-ops.",
 					  __FILE__, __LINE__);
-		return;
+		goto err_hndl;
 	}
 	
 	inc_t rs_a = lda;
@@ -121,7 +140,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int32_t,u8s8s32os8)
 		bli_print_msg(" Reordering of A matrix is not supported "
 					  "in row major case.",
 					  __FILE__, __LINE__);
-		return;
+		goto err_hndl;
 	}
 	// Inputs swapped in column major, A becomes B from kernel point of view.
 	// Reorder is not supported for column major matrices.
@@ -131,7 +150,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int32_t,u8s8s32os8)
 		bli_print_msg(" Reordering of column major matrices "
 					  "is not supported.",
 					  __FILE__, __LINE__);
-		return;
+		goto err_hndl;
 	}
 
 	// From 5-loop function point of view
@@ -171,7 +190,10 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int32_t,u8s8s32os8)
 	  m, n
 	);
 
-	if( err != BLIS_SUCCESS ) return;
+	if( err != BLIS_SUCCESS )
+	{
+		goto err_hndl;
+	}
 
 	// Initialize a local runtime with global settings if necessary. Note
 	// that in the case that a runtime is passed in, we make a local copy.
@@ -231,4 +253,6 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int32_t,u8s8s32os8)
 	}
 #endif
 
+err_hndl:;
+	LPGEMM_STOP_LOGGER();
 }
diff --git a/addon/aocl_gemm/config/lpgemm_config.c b/addon/aocl_gemm/config/lpgemm_config.c
index ef6a3c97b..d744d7a62 100644
--- a/addon/aocl_gemm/config/lpgemm_config.c
+++ b/addon/aocl_gemm/config/lpgemm_config.c
@@ -46,6 +46,7 @@
 #include "lpgemm_packb_s8.h"
 #include "lpgemm_packb_s8s16.h"
 #include "lpgemm_pack_f32.h"
+#include "lpgemm_logger.h"
 
 static lpgemm_cntx_t global_cntx_t_list[AOCL_OPERATION_TYPE_LEN] \
 			__attribute__((aligned(64))); //Only one op type supported now.
diff --git a/addon/aocl_gemm/frame/logging/lpgemm_logger.c b/addon/aocl_gemm/frame/logging/lpgemm_logger.c
new file mode 100644
index 000000000..419cd9a85
--- /dev/null
+++ b/addon/aocl_gemm/frame/logging/lpgemm_logger.c
@@ -0,0 +1,339 @@
+/*
+
+   BLIS
+   An object-based framework for developing high-performance BLAS-like
+   libraries.
+
+   Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    - Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    - Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    - Neither the name(s) of the copyright holder(s) nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#include "blis.h"
+#include "lpgemm_sys.h"
+#include "lpgemm_logger.h"
+#include "lpgemm_post_ops.h"
+#include "lpgemm_types.h"
+#include <string.h>
+
+#ifdef AOCL_LPGEMM_LOGGER_SUPPORT
+
+static bli_pthread_once_t once_check_lpgemm_logger_init = BLIS_PTHREAD_ONCE_INIT;
+
+static bool lpgemm_logger_enabled = FALSE;
+
+FILE* lpgemm_start_logger_fn(void)
+{
+	lpgemm_init_logger();
+
+	FILE* fd = NULL;
+
+	if ( lpgemm_logger_enabled == TRUE )
+	{
+		char log_file[255] = {0};
+		sprintf( log_file, "%s_P%lu_T%lu%s",
+				AOCL_LPGEMM_LOG_FILE_PRFX,
+				lpgemm_getpid(), lpgemm_gettid(),
+				AOCL_LPGEMM_LOG_FILE_EXT );
+
+		fd = fopen( log_file, "a" );
+	}
+
+	return fd;
+}
+
+void lpgemm_stop_logger_fn( FILE* fd )
+{
+	if ( ( lpgemm_logger_enabled == TRUE ) && ( fd != NULL ) )
+	{
+		fflush( fd );
+		fclose( fd );
+	}
+}
+
+#define LPGEMM_POST_OPS_STR_COPY(ops_str, ops_str_len, p_str) \
+	do \
+	{ \
+		char* c_ops_str = p_str; \
+		size_t c_ops_str_len = strlen( c_ops_str ); \
+		strcpy( ops_str + ops_str_len, c_ops_str ); \
+		ops_str_len += c_ops_str_len; \
+	} while ( 0 ); \
+
+static void lpgemm_get_pre_ops_str( aocl_post_op* post_ops, char* ops_str )
+{
+	if ( post_ops == NULL )
+	{
+		strcpy( ops_str, "none" );
+		return;
+	}
+
+	aocl_pre_op* pre_ops = post_ops->pre_ops;
+	if ( ( pre_ops == NULL ) || ( pre_ops->seq_length <= 0 ) )
+	{
+		strcpy( ops_str, "none" );
+		return;
+	}
+	if ( ( pre_ops->seq_length > AOCL_MAX_POST_OPS ) )
+	{
+		strcpy( ops_str, "ops over-limit" );
+		return;
+	}
+
+	size_t ops_str_len = 0;
+	char* delim_str = "#";
+	size_t delim_str_len = strlen( delim_str );
+
+	LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "group_sz=" );
+	int written = sprintf( ( ops_str + ops_str_len ), "%ld", pre_ops->group_size );
+	if ( written > 0 )
+	{
+		ops_str += written;
+	}
+	strcpy( ops_str + ops_str_len, delim_str );
+	ops_str_len += delim_str_len;
+
+	for (dim_t i = 0; i < pre_ops->seq_length; ++i)
+	{
+		LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "scale=" );
+		if ( ( pre_ops->b_scl ) != NULL )
+		{
+			if ( ( pre_ops->b_scl + i )->scale_factor_len == 1 )
+			{
+				LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "scalar_scale_factor," );
+			}
+			else
+			{
+				LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "vector_scale_factor," );
+			}
+		}
+
+		if ( ( pre_ops->b_zp ) != NULL )
+		{
+			if ( ( pre_ops->b_zp + i )->zero_point_len == 1 )
+			{
+				LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "scalar_zero_point," );
+			}
+			else
+			{
+				LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "vector_zero_point," );
+			}
+		}
+
+		strcpy( ops_str + ops_str_len, delim_str );
+		ops_str_len += delim_str_len;
+	}
+}
+
+static void lpgemm_get_post_ops_str( aocl_post_op* post_ops, char* ops_str )
+{
+	if ( ( post_ops == NULL ) || ( post_ops->seq_length <= 0 ) )
+	{
+		strcpy( ops_str, "none" );
+		return;
+	}
+	if ( ( post_ops->seq_length > AOCL_MAX_POST_OPS ) )
+	{
+		strcpy( ops_str, "ops over-limit" );
+		return;
+	}
+
+	size_t ops_str_len = 0;
+	dim_t e_i = 0; // Multiple eltwise supported.
+	dim_t s_i = 0; // Multiple sum/scale supported.
+	char* delim_str = "#";
+	size_t delim_str_len = strlen( delim_str );
+	for ( dim_t i = 0; i < post_ops->seq_length; ++i )
+	{
+		// Dispatcher code
+		switch ( *( post_ops->seq_vector + i ) )
+		{
+			case ELTWISE:
+				{
+					LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "eltwise=");
+					// Eltwise algo dispatcher.
+					switch ( ( post_ops->eltwise + e_i )->algo.algo_type )
+					{
+						case RELU:
+							{
+								LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "relu");
+							}
+							break;
+						case PRELU:
+							{
+								LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "prelu" );
+							}
+							break;
+						case GELU_TANH:
+							{
+								LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "gelu_tanh" );
+							}
+							break;
+						case GELU_ERF:
+							{
+								LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "gelu_erf" );
+							}
+							break;
+						case CLIP:
+							{
+								LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "clip" );
+							}
+							break;
+						case SWISH:
+							{
+								LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "swish" );
+							}
+							break;
+						case TANH:
+							{
+								LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "tanh" );
+							}
+							break;
+						case SIGMOID:
+							{
+								LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "sigmoid" );
+							}
+							break;
+						default:
+							break;
+					}
+					e_i += 1;
+				}
+				break;
+			case BIAS:
+				{
+					LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "bias" );
+				}
+				break;
+			case SCALE:
+				{
+					LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "scale=" );
+					if ( ( post_ops->sum + s_i )->scale_factor_len == 1 )
+					{
+						LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "scalar_scale_factor," );
+					}
+					else
+					{
+						LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "vector_scale_factor," );
+					}
+
+					if ( ( post_ops->sum + s_i )->zero_point_len == 1 )
+					{
+						LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "scalar_zero_point," );
+					}
+					else
+					{
+						LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "vector_zero_point," );
+					}
+
+					s_i += 1;
+				}
+				break;
+			case MATRIX_ADD:
+				{
+					LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "mat_add" );
+				}
+				break;
+			case MATRIX_MUL:
+				{
+					LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "mat_mul" );
+				}
+				break;
+			default:
+				break;
+		}
+
+		strcpy( ops_str + ops_str_len, delim_str );
+		ops_str_len += delim_str_len;
+	}
+}
+
+void lpgemm_write_logger_gemm_fn
+     (
+       FILE*         fd,
+       char*         op_type,
+       const char    order,
+       const char    transa,
+       const char    transb,
+       const dim_t   m,
+       const dim_t   n,
+       const dim_t   k,
+       const float   alpha,
+       const dim_t   lda,
+       const char    mem_format_a,
+       const dim_t   ldb,
+       const char    mem_format_b,
+       const float   beta,
+       const dim_t   ldc,
+       aocl_post_op* post_op_unparsed
+     )
+{
+	if ( ( lpgemm_logger_enabled == TRUE ) && ( fd != NULL ) )
+	{
+		char pre_ops_str[1024] = {0};
+		lpgemm_get_pre_ops_str( post_op_unparsed, pre_ops_str );
+
+		char post_ops_str[2048] = {0};
+		lpgemm_get_post_ops_str( post_op_unparsed, post_ops_str );
+
+		fprintf( fd, "%c %c %c %c %c %ld %ld %ld %ld %ld %ld "\
+					"%s:pre_ops=[%s]:post_ops=[%s] %f %f ",
+				order, transa, transb, mem_format_a, mem_format_b,
+				m, n, k, lda, ldb, ldc,
+				op_type, pre_ops_str, post_ops_str,
+				alpha, beta );
+	}
+}
+
+void lpgemm_write_logger_time_break_fn( FILE* fd, double stime )
+{
+	if ( ( lpgemm_logger_enabled == TRUE ) && ( fd != NULL ) )
+	{
+		fprintf( fd, "%f \n", stime );
+	}
+}
+
+void _lpgemm_init_logger()
+{
+	lpgemm_logger_enabled =
+		bli_env_get_var( "AOCL_ENABLE_LPGEMM_LOGGER", FALSE );
+}
+
+void lpgemm_init_logger()
+{
+	bli_pthread_once
+	(
+	  &once_check_lpgemm_logger_init,
+	  _lpgemm_init_logger
+	);
+}
+
+#else
+
+void lpgemm_init_logger()
+{}
+
+#endif
diff --git a/addon/aocl_gemm/frame/logging/lpgemm_logger.h b/addon/aocl_gemm/frame/logging/lpgemm_logger.h
new file mode 100644
index 000000000..d908059b3
--- /dev/null
+++ b/addon/aocl_gemm/frame/logging/lpgemm_logger.h
@@ -0,0 +1,96 @@
+/*
+
+   BLIS
+   An object-based framework for developing high-performance BLAS-like
+   libraries.
+
+   Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    - Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    - Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    - Neither the name(s) of the copyright holder(s) nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#ifndef LPGEMM_LOGGER_H
+#define LPGEMM_LOGGER_H
+
+#ifdef AOCL_LPGEMM_LOGGER_SUPPORT
+
+#define AOCL_LPGEMM_LOG_FILE_PRFX "aocl_gemm_log"
+#define AOCL_LPGEMM_LOG_FILE_EXT ".txt"
+
+FILE* lpgemm_start_logger_fn(void);
+void lpgemm_stop_logger_fn( FILE* fd );
+void lpgemm_write_logger_gemm_fn
+     (
+       FILE*         fd,
+       char*         op_type,
+       const char    order,
+       const char    transa,
+       const char    transb,
+       const dim_t   m,
+       const dim_t   n,
+       const dim_t   k,
+       const float   alpha,
+       const dim_t   lda,
+       const char    mem_format_a,
+       const dim_t   ldb,
+       const char    mem_format_b,
+       const float   beta,
+       const dim_t   ldc,
+       aocl_post_op* post_op_unparsed
+     );
+void lpgemm_write_logger_time_break_fn( FILE* fd, double stime );
+
+#define LPGEMM_START_LOGGER() \
+	FILE* fd = lpgemm_start_logger_fn(); \
+	double aocl_lpgemm_logger_start_time = bli_clock(); \
+
+#define LPGEMM_STOP_LOGGER() \
+	double aocl_lpgemm_logger_stop_time = DBL_MAX; \
+	aocl_lpgemm_logger_stop_time = \
+			bli_clock_min_diff \
+			( \
+			  aocl_lpgemm_logger_stop_time, \
+			  aocl_lpgemm_logger_start_time \
+			); \
+	lpgemm_write_logger_time_break_fn( fd, aocl_lpgemm_logger_stop_time ); \
+	lpgemm_stop_logger_fn( fd ); \
+
+#define LPGEMM_WRITE_LOGGER(...) \
+	lpgemm_write_logger_gemm_fn( fd, __VA_ARGS__ ); \
+
+#else
+
+#define LPGEMM_START_LOGGER(...)
+
+#define LPGEMM_STOP_LOGGER(...)
+
+#define LPGEMM_WRITE_LOGGER(...)
+
+#endif
+
+void lpgemm_init_logger();
+
+#endif //LPGEMM_LOGGER_H
diff --git a/addon/aocl_gemm/frame/sys_utils/lpgemm_sys.c b/addon/aocl_gemm/frame/sys_utils/lpgemm_sys.c
new file mode 100644
index 000000000..3e947a28c
--- /dev/null
+++ b/addon/aocl_gemm/frame/sys_utils/lpgemm_sys.c
@@ -0,0 +1,67 @@
+/*
+
+   BLIS
+   An object-based framework for developing high-performance BLAS-like
+   libraries.
+
+   Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    - Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    - Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    - Neither the name(s) of the copyright holder(s) nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#include "blis.h"
+#include "lpgemm_sys.h"
+
+#if defined(__linux__)
+#include <sys/types.h>
+#include <unistd.h>
+#endif
+
+uint64_t lpgemm_gettid( void )
+{
+#ifdef BLIS_ENABLE_OPENMP
+	return ( uint64_t )omp_get_thread_num();
+#else
+ #ifdef BLIS_ENABLE_PTHREADS
+  #ifndef _WIN32
+	return ( uint64_t ) pthread_self();
+  #else
+	return 0;
+  #endif
+ #else
+	return 0;
+ #endif
+#endif
+}
+
+uint64_t lpgemm_getpid( void )
+{
+#if defined(__linux__)
+	return ( uint64_t ) getpid();
+#else
+	return 0;
+#endif
+}
diff --git a/addon/aocl_gemm/frame/sys_utils/lpgemm_sys.h b/addon/aocl_gemm/frame/sys_utils/lpgemm_sys.h
new file mode 100644
index 000000000..2e9e55c10
--- /dev/null
+++ b/addon/aocl_gemm/frame/sys_utils/lpgemm_sys.h
@@ -0,0 +1,41 @@
+/*
+
+   BLIS
+   An object-based framework for developing high-performance BLAS-like
+   libraries.
+
+   Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+    - Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    - Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+    - Neither the name(s) of the copyright holder(s) nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+#ifndef LPGEMM_SYS_UTILS_H
+#define LPGEMM_SYS_UTILS_H
+
+uint64_t lpgemm_gettid( void );
+uint64_t lpgemm_getpid( void );
+
+#endif //LPGEMM_SYS_UTILS_H
diff --git a/bench/bench_aocl_gemm/bench_input.txt b/bench/bench_aocl_gemm/bench_input.txt
index 4e3955d46..d758cd6f2 100644
--- a/bench/bench_aocl_gemm/bench_input.txt
+++ b/bench/bench_aocl_gemm/bench_input.txt
@@ -1,5 +1,4 @@
 c n t n n 32 128 2 32 128 32 bf16bf16f32of32:bias=na,swish
-#
 r n n n r 6 1 4 4 16 16 bf16s4f32of32:pre_op_scale=scalar,pre_op_scale_type=bf16,group_size=2
 r n n n r 6 1 4 4 16 16 bf16s4f32of32:pre_op_zp=vector,pre_op_scale=scalar,pre_op_scale_type=bf16,group_size=2
 r n n n r 6 1 4 4 16 16 bf16s4f32of32:pre_op_zp=scalar,pre_op_scale=scalar,pre_op_scale_type=bf16,group_size=2
diff --git a/bench/bench_aocl_gemm/bench_lpgemm.c b/bench/bench_aocl_gemm/bench_lpgemm.c
index a441f5bae..e39a5f789 100644
--- a/bench/bench_aocl_gemm/bench_lpgemm.c
+++ b/bench/bench_aocl_gemm/bench_lpgemm.c
@@ -1046,7 +1046,6 @@ GEN_MAT_MUL_ACC_CHK_DRV_FUNC(int8_t,int8_t,int8_t,int16_t,float,s8s8s16os8,s8s8s
 GEN_MAT_MUL_ACC_CHK_DRV_FUNC(bfloat16,int8_t,float,float,float,bf16s4f32of32,bf16bf16f32obf16)
 GEN_MAT_MUL_ACC_CHK_DRV_FUNC(bfloat16,int8_t,bfloat16,float,float,bf16s4f32obf16,bf16bf16f32obf16)
 
-
 GEN_MAT_MUL_POST_OPS_CREATOR(int8_t,int16_t,float,int16_t,u8s8s16os16)
 GEN_MAT_MUL_POST_OPS_CREATOR(int8_t,int32_t,float,int32_t,u8s8s32os32)
 GEN_MAT_MUL_POST_OPS_CREATOR(bfloat16,float,float,bfloat16,bf16bf16f32of32)
@@ -1431,6 +1430,8 @@ int main( int argc, char** argv )
                 strncpy( post_ops_str_dest, post_ops_str, POST_OPS_STR_LEN );
                 global_dscale_out = 'n';
                 global_pre_op = 'n';
+                DSCALE_CLIP_MIN = INT_MIN;
+                DSCALE_CLIP_MAX = INT_MAX;
                 GEN_FUNC_NAME(mat_mul_bench_main_,u8s8s32os32)
                 (
                   fin, fout, stor_order, transa, transb, op_a, op_b,
@@ -1462,6 +1463,8 @@ int main( int argc, char** argv )
                 strncpy( post_ops_str_dest, post_ops_str, POST_OPS_STR_LEN );
                 global_dscale_out = 'n';
                 global_pre_op = 'n';
+                DSCALE_CLIP_MIN = INT_MIN;
+                DSCALE_CLIP_MAX = INT_MAX;
 
                 if ( ( op_b != 'r' ) && ( op_b != 'R' ) )
                 {
@@ -1492,12 +1495,15 @@ int main( int argc, char** argv )
                   post_ops_str_dest, FALSE
                 );
             }
+#if 0
             if ( ( strcmp( gemm_type_str, "u8s8s16os16" ) == 0 ) ||
                  ( strcmp( gemm_type_str, "*" ) == 0 ) )
             {
                 strncpy( post_ops_str_dest, post_ops_str, POST_OPS_STR_LEN );
                 global_dscale_out = 'n';
                 global_pre_op = 'n';
+                DSCALE_CLIP_MIN = SHRT_MIN;
+                DSCALE_CLIP_MAX = SHRT_MAX;
                 GEN_FUNC_NAME(mat_mul_bench_main_,u8s8s16os16)
                 (
                     fin, fout, stor_order, transa, transb, op_a, op_b,
@@ -1535,6 +1541,7 @@ int main( int argc, char** argv )
                     post_ops_str_dest, FALSE
                 );
             }
+#endif
             if ( ( strcmp( gemm_type_str, "bf16bf16f32of32" ) == 0 ) ||
                  ( strcmp( gemm_type_str, "*" ) == 0 ) )
             {
@@ -1609,6 +1616,8 @@ int main( int argc, char** argv )
                 strncpy( post_ops_str_dest, post_ops_str, POST_OPS_STR_LEN );
                 global_dscale_out = 'n';
                 global_pre_op = 'n';
+                DSCALE_CLIP_MIN = INT_MIN;
+                DSCALE_CLIP_MAX = INT_MAX;
                 GEN_FUNC_NAME(mat_mul_bench_main_,s8s8s32os32)
                 (
                   fin, fout, stor_order, transa, transb, op_a, op_b,
@@ -1631,12 +1640,15 @@ int main( int argc, char** argv )
                   post_ops_str_dest, FALSE
                 );
             }
+#if 0
             if ( ( strcmp( gemm_type_str, "s8s8s16os16" ) == 0 ) ||
                  ( strcmp( gemm_type_str, "*" ) == 0 ) )
             {
                 strncpy( post_ops_str_dest, post_ops_str, POST_OPS_STR_LEN );
                 global_dscale_out = 'n';
                 global_pre_op = 'n';
+                DSCALE_CLIP_MIN = SHRT_MIN;
+                DSCALE_CLIP_MAX = SHRT_MAX;
                 GEN_FUNC_NAME(mat_mul_bench_main_,s8s8s16os16)
                 (
                   fin, fout, stor_order, transa, transb, op_a, op_b,
@@ -1659,6 +1671,7 @@ int main( int argc, char** argv )
                   post_ops_str_dest, FALSE
                 );
             }
+#endif
         }
     }
 
diff --git a/bench/bench_aocl_gemm/bench_lpgemm_helpers.h b/bench/bench_aocl_gemm/bench_lpgemm_helpers.h
index 23efacb14..207c0a16b 100644
--- a/bench/bench_aocl_gemm/bench_lpgemm_helpers.h
+++ b/bench/bench_aocl_gemm/bench_lpgemm_helpers.h
@@ -43,13 +43,14 @@
 #include <float.h>
 #include <math.h>
 #include <omp.h>
+#include <limits.h>
 
 #include "blis.h"
 
 // Used to clip downscaled output, will be set in the main loop based
 // on the accumulation and C data type.
-int64_t DSCALE_CLIP_MIN = 0;
-int64_t DSCALE_CLIP_MAX = 0;
+int64_t DSCALE_CLIP_MIN = INT_MIN;
+int64_t DSCALE_CLIP_MAX = INT_MAX;
 
 // Mode can be one of the follwoing:
 // 1. p - performance, used for benchmarks.
@@ -434,16 +435,16 @@ static inline void mat_mul_get_output_type_valfloatbfloat16
        float* temp_accum
      )
 {
-	/* Fix for rounding bias. */
-	uint32_t inter_temp;
-	memcpy( &inter_temp, temp_accum, sizeof( float ) );
+    /* Fix for rounding bias. */
+    uint32_t inter_temp;
+    memcpy( &inter_temp, temp_accum, sizeof( float ) );
 
-	/* Check if 16th bit is set */
-	uint32_t tlsb = ( inter_temp & ( uint32_t )0x00010000 ) > 16;
+    /* Check if 16th bit is set */
+    uint32_t tlsb = ( inter_temp & ( uint32_t )0x00010000 ) > 16;
 
-	/* Adding rounding bias. */
-	uint32_t rounded = inter_temp + ( uint32_t )0x00007FFF + tlsb;
-	memcpy( temp_accum, &rounded, sizeof( float ) );
+    /* Adding rounding bias. */
+    uint32_t rounded = inter_temp + ( uint32_t )0x00007FFF + tlsb;
+    memcpy( temp_accum, &rounded, sizeof( float ) );
 
     float_to_bf16( temp_accum, out_temp_accum );
 }
@@ -772,6 +773,16 @@ static inline aocl_post_op* lpgemm_create_post_ops_struct_ ## BLAS_SFX \
                     is_scalar_scale = TRUE; \
                 } \
             } \
+            else if ( strcmp( ops_tok, "zp" ) == 0 ) \
+            { \
+                ops_tok = strtok( NULL, ", " ); \
+                str_tolower( ops_tok ); \
+                if ( ( strcmp( ops_tok, "scalar" ) == 0 ) || \
+                     ( strcmp( ops_tok, "s" ) == 0 ) ) \
+                { \
+                    is_scalar_zp = TRUE; \
+                } \
+            } \
             else if ( strcmp( ops_tok, "matrix_add" ) == 0 ) \
             { \
                 post_ops->seq_vector[cur_op_index] = MATRIX_ADD; \
@@ -812,15 +823,15 @@ static inline aocl_post_op* lpgemm_create_post_ops_struct_ ## BLAS_SFX \
             } \
             else if ( strcmp( ops_tok, "pre_op_zp" ) == 0 ) \
             { \
-               ops_tok = strtok( NULL, ", " ); \
+                ops_tok = strtok( NULL, ", " ); \
                 str_tolower( ops_tok ); \
                 if ( ( strcmp( ops_tok, "scalar" ) == 0 ) || \
                      ( strcmp( ops_tok, "s" ) == 0 ) ) \
                 { \
                     /* set scalar zp */\
                     zp_vec_length = 1; \
-                }else if ( ( strcmp( ops_tok, "vector" ) == 0 ) || \
-                           ( strcmp( ops_tok, "v" ) == 0 ) ) \
+                } \
+                else \
                 { \
                     /* set vector zp */\
                     zp_vec_length = n; \
@@ -835,8 +846,8 @@ static inline aocl_post_op* lpgemm_create_post_ops_struct_ ## BLAS_SFX \
                 { \
                     /* set scalar scale */\
                     is_pre_op_scale_scalar = TRUE; \
-                }else if ( ( strcmp( ops_tok, "vector" ) == 0 ) || \
-                           ( strcmp( ops_tok, "v" ) == 0 ) ) \
+                } \
+                else \
                 { \
                     /* set vector scale */\
                     is_pre_op_scale_scalar = FALSE; \
@@ -1144,10 +1155,11 @@ static inline aocl_post_op* lpgemm_create_post_ops_struct_ ## BLAS_SFX \
         if ( post_ops->pre_ops == NULL ) { goto err_handler; } \
 \
         dim_t num_groups = 1; \
-        if(quant_group_size == 0) \
+        if (quant_group_size == 0) \
         { \
             post_ops->pre_ops->group_size = k; \
-        }else \
+        } \
+        else \
         { \
             post_ops->pre_ops->group_size = quant_group_size; \
             if(is_group_quant) \
@@ -1157,6 +1169,10 @@ static inline aocl_post_op* lpgemm_create_post_ops_struct_ ## BLAS_SFX \
         } \
 \
         ( post_ops->pre_ops )->b_zp = NULL; \
+        if ( zp_vec_length == 0 ) \
+        { \
+            zp_vec_length = n; \
+        } \
         if( zp_vec_length != 0 ) \
         { \
             ( post_ops->pre_ops )->b_zp = malloc( sizeof( aocl_pre_op_zp ) ); \