diff --git a/addon/aocl_gemm/aocl_gemm_bf16bf16f32obf16.c b/addon/aocl_gemm/aocl_gemm_bf16bf16f32obf16.c index d182f529c..ae9e877f8 100644 --- a/addon/aocl_gemm/aocl_gemm_bf16bf16f32obf16.c +++ b/addon/aocl_gemm/aocl_gemm_bf16bf16f32obf16.c @@ -41,9 +41,23 @@ #include "lpgemm_5loop_interface_apis.h" #include "lpgemm_config.h" #include "lpgemm_utils.h" +#include "lpgemm_logger.h" AOCL_GEMM_MATMUL(bfloat16,bfloat16,bfloat16,float,bf16bf16f32obf16) { + LPGEMM_START_LOGGER(); + LPGEMM_WRITE_LOGGER \ + ( + "bf16bf16f32obf16", \ + order, transa, transb, \ + m, n, k, \ + ( ( float ) alpha ), \ + lda, mem_format_a, \ + ldb, mem_format_b, \ + ( ( float ) beta ), \ + ldc, post_op_unparsed \ + ); + trans_t blis_transa; trans_t blis_transb; @@ -52,7 +66,7 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,bfloat16,float,bf16bf16f32obf16) { bli_print_msg(" AVX512_BF16 ISA not supported by processor, " "cannot perform bf16bf16f32 gemm.", __FILE__, __LINE__ ); - return; // Error. + goto err_hndl; } /* Initialize BLIS. */ @@ -62,6 +76,7 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,bfloat16,float,bf16bf16f32obf16) aocl_lpgemm_init_global_cntx(); // check for validity of params. + int err_no = 0; AOCL_GEMM_CHECK ( "bf16bf16f32obf16", @@ -69,8 +84,13 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,bfloat16,float,bf16bf16f32obf16) m, n, k, a, lda, mem_format_a, b, ldb, mem_format_b, - c, ldc + c, ldc, + err_no ); + if ( err_no != 0 ) + { + goto err_hndl; + } #ifdef LPGEMM_BF16_JIT if( jit_kernels_generated == FALSE ) @@ -119,14 +139,14 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,bfloat16,float,bf16bf16f32obf16) if( ( is_row_major == TRUE ) && ( mtag_a == REORDERED ) ) { bli_print_msg(" Reordering of A matrix is not supported in row major case.", __FILE__, __LINE__ ); - return; + goto err_hndl; } // Inputs swapped in column major, A becomes B from kernel point of view. // Reorder is not supported for column major matrices. else if ( ( is_column_major == TRUE ) && ( ( mtag_b == REORDERED ) || ( mtag_a == REORDERED ) ) ) { bli_print_msg(" Reordering of column major matrices is not supported.", __FILE__, __LINE__ ); - return; + goto err_hndl; } // From 5-loop function point of view, @@ -166,7 +186,10 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,bfloat16,float,bf16bf16f32obf16) m, n ); - if( err != BLIS_SUCCESS ) return; + if( err != BLIS_SUCCESS ) + { + goto err_hndl; + } // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. @@ -233,4 +256,7 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,bfloat16,float,bf16bf16f32obf16) ); } #endif + +err_hndl:; + LPGEMM_STOP_LOGGER(); } diff --git a/addon/aocl_gemm/aocl_gemm_bf16bf16f32of32.c b/addon/aocl_gemm/aocl_gemm_bf16bf16f32of32.c index 0930fb5d1..dc591ac15 100644 --- a/addon/aocl_gemm/aocl_gemm_bf16bf16f32of32.c +++ b/addon/aocl_gemm/aocl_gemm_bf16bf16f32of32.c @@ -41,9 +41,23 @@ #include "lpgemm_5loop_interface_apis.h" #include "lpgemm_config.h" #include "lpgemm_utils.h" +#include "lpgemm_logger.h" AOCL_GEMM_MATMUL(bfloat16,bfloat16,float,float,bf16bf16f32of32) { + LPGEMM_START_LOGGER(); + LPGEMM_WRITE_LOGGER \ + ( + "bf16bf16f32of32", \ + order, transa, transb, \ + m, n, k, \ + ( ( float ) alpha ), \ + lda, mem_format_a, \ + ldb, mem_format_b, \ + ( ( float ) beta ), \ + ldc, post_op_unparsed \ + ); + trans_t blis_transa; trans_t blis_transb; @@ -52,7 +66,7 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,float,float,bf16bf16f32of32) { bli_print_msg(" AVX512_BF16 ISA not supported by processor, " "cannot perform bf16bf16f32 gemm.", __FILE__, __LINE__ ); - return; // Error. + goto err_hndl; } /* Initialize BLIS. */ @@ -61,7 +75,8 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,float,float,bf16bf16f32of32) // Set MC, NC, KC, NR, MR. aocl_lpgemm_init_global_cntx(); -// check for validity of params. + // check for validity of params. + int err_no = 0; AOCL_GEMM_CHECK ( "bf16bf16f32of32", @@ -69,8 +84,13 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,float,float,bf16bf16f32of32) m, n, k, a, lda, mem_format_a, b, ldb, mem_format_b, - c, ldc + c, ldc, + err_no ); + if ( err_no != 0 ) + { + goto err_hndl; + } #ifdef LPGEMM_BF16_JIT if( jit_kernels_generated == FALSE ) @@ -120,14 +140,14 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,float,float,bf16bf16f32of32) if( ( is_row_major == TRUE ) && ( mtag_a == REORDERED ) ) { bli_print_msg(" Reordering of A matrix is not supported in row major case.", __FILE__, __LINE__ ); - return; + goto err_hndl; } // Inputs swapped in column major, A becomes B from kernel point of view. // Reorder is not supported for column major matrices. else if ( ( is_column_major == TRUE ) && ( ( mtag_b == REORDERED ) || ( mtag_a == REORDERED ) ) ) { bli_print_msg(" Reordering of column major matrices is not supported.", __FILE__, __LINE__ ); - return; + goto err_hndl; } // From 5-loop function point of view @@ -167,7 +187,10 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,float,float,bf16bf16f32of32) m, n ); - if( err != BLIS_SUCCESS ) return; + if( err != BLIS_SUCCESS ) + { + goto err_hndl; + } // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. @@ -234,4 +257,7 @@ AOCL_GEMM_MATMUL(bfloat16,bfloat16,float,float,bf16bf16f32of32) ); } #endif + +err_hndl:; + LPGEMM_STOP_LOGGER(); } diff --git a/addon/aocl_gemm/aocl_gemm_bf16s4f32of32.c b/addon/aocl_gemm/aocl_gemm_bf16s4f32of32.c index a35cb665b..7891fd4cd 100644 --- a/addon/aocl_gemm/aocl_gemm_bf16s4f32of32.c +++ b/addon/aocl_gemm/aocl_gemm_bf16s4f32of32.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2024 - 2025, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -41,9 +41,23 @@ #include "lpgemm_5loop_interface_apis.h" #include "lpgemm_config.h" #include "lpgemm_utils.h" +#include "lpgemm_logger.h" AOCL_GEMM_MATMUL(bfloat16, int8_t, float, float, bf16s4f32of32) { + LPGEMM_START_LOGGER(); + LPGEMM_WRITE_LOGGER \ + ( + "bf16s4f32of32", \ + order, transa, transb, \ + m, n, k, \ + ( ( float ) alpha ), \ + lda, mem_format_a, \ + ldb, mem_format_b, \ + ( ( float ) beta ), \ + ldc, post_op_unparsed \ + ); + trans_t blis_transa; trans_t blis_transb; @@ -53,7 +67,7 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, float, float, bf16s4f32of32) bli_print_msg(" AVX512_BF16 ISA not supported by processor, " "cannot perform bf16bf16f32 gemm.", __FILE__, __LINE__); - return; // Error. + goto err_hndl; } /* Initialize BLIS. */ @@ -63,13 +77,18 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, float, float, bf16s4f32of32) aocl_lpgemm_init_global_cntx(); // check for validity of params. + int err_no = 0; AOCL_GEMM_CHECK( "bf16s4f32of32", order, transa, transb, m, n, k, a, lda, mem_format_a, b, ldb, mem_format_b, - c, ldc); + c, ldc, err_no); + if ( err_no != 0 ) + { + goto err_hndl; + } /* Map BLAS chars to their corresponding BLIS enumerated type value. */ bli_param_map_netlib_to_blis_trans(transa, &blis_transa); @@ -108,14 +127,14 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, float, float, bf16s4f32of32) if ((is_row_major == TRUE) && (mtag_a == REORDERED)) { bli_print_msg(" Reordering of A matrix is not supported in row major case.", __FILE__, __LINE__); - return; + goto err_hndl; } // Inputs swapped in column major, A becomes B from kernel point of view. // Reorder is not supported for column major matrices. else if ((is_column_major == TRUE) && ((mtag_b == REORDERED) || (mtag_a == REORDERED))) { bli_print_msg(" Reordering of column major matrices is not supported.", __FILE__, __LINE__); - return; + goto err_hndl; } // From 5-loop function point of view @@ -155,7 +174,9 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, float, float, bf16s4f32of32) m, n, k ); if (err != BLIS_SUCCESS) - return; + { + goto err_hndl; + } // Convert post op struct to post op linked list format. lpgemm_post_op post_op_list[AOCL_MAX_POST_OPS]; @@ -167,7 +188,9 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, float, float, bf16s4f32of32) m, n ); if (err != BLIS_SUCCESS) - return; + { + goto err_hndl; + } // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. @@ -183,7 +206,7 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, float, float, bf16s4f32of32) { // Swapping inputs not possible in case of mixed precision. bli_print_msg(" column major not supported yet in bf16s4f32o.", __FILE__, __LINE__); - return; + goto err_hndl; } else { @@ -204,7 +227,7 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, float, float, bf16s4f32of32) { // Swapping inputs not possible in case of mixed precision. bli_print_msg(" column major not supported yet in bf16s4f32o.", __FILE__, __LINE__); - return; + goto err_hndl; } else { @@ -220,10 +243,26 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, float, float, bf16s4f32of32) ); } #endif + +err_hndl:; + LPGEMM_STOP_LOGGER(); } AOCL_GEMM_MATMUL(bfloat16, int8_t, bfloat16, float, bf16s4f32obf16) { + LPGEMM_START_LOGGER(); + LPGEMM_WRITE_LOGGER \ + ( + "bf16s4f32obf16", \ + order, transa, transb, \ + m, n, k, \ + ( ( float ) alpha ), \ + lda, mem_format_a, \ + ldb, mem_format_b, \ + ( ( float ) beta ), \ + ldc, post_op_unparsed \ + ); + trans_t blis_transa; trans_t blis_transb; @@ -233,7 +272,7 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, bfloat16, float, bf16s4f32obf16) bli_print_msg(" AVX512_BF16 ISA not supported by processor, " "cannot perform bf16bf16f32 gemm.", __FILE__, __LINE__); - return; // Error. + goto err_hndl; } /* Initialize BLIS. */ @@ -243,13 +282,18 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, bfloat16, float, bf16s4f32obf16) aocl_lpgemm_init_global_cntx(); // check for validity of params. + int err_no = 0; AOCL_GEMM_CHECK( "bf16s4f32obf16", order, transa, transb, m, n, k, a, lda, mem_format_a, b, ldb, mem_format_b, - c, ldc); + c, ldc, err_no); + if ( err_no != 0 ) + { + goto err_hndl; + } /* Map BLAS chars to their corresponding BLIS enumerated type value. */ bli_param_map_netlib_to_blis_trans(transa, &blis_transa); @@ -289,14 +333,14 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, bfloat16, float, bf16s4f32obf16) if ((is_row_major == TRUE) && (mtag_a == REORDERED)) { bli_print_msg(" Reordering of A matrix is not supported in row major case.", __FILE__, __LINE__); - return; + goto err_hndl; } // Inputs swapped in column major, A becomes B from kernel point of view. // Reorder is not supported for column major matrices. else if ((is_column_major == TRUE) && ((mtag_b == REORDERED) || (mtag_a == REORDERED))) { bli_print_msg(" Reordering of column major matrices is not supported.", __FILE__, __LINE__); - return; + goto err_hndl; } // From 5-loop function point of view @@ -334,7 +378,9 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, bfloat16, float, bf16s4f32obf16) m, n, k); if (err != BLIS_SUCCESS) - return; + { + goto err_hndl; + } // Convert post op struct to post op linked list format. lpgemm_post_op post_op_list[AOCL_MAX_POST_OPS]; @@ -344,7 +390,9 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, bfloat16, float, bf16s4f32obf16) m, n); if (err != BLIS_SUCCESS) - return; + { + goto err_hndl; + } // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. @@ -360,7 +408,7 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, bfloat16, float, bf16s4f32obf16) { // Swapping inputs not possible in case of mixed precision. bli_print_msg(" column major not supported yet in bf16s4f32o.", __FILE__, __LINE__); - return; + goto err_hndl; } else { @@ -381,7 +429,7 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, bfloat16, float, bf16s4f32obf16) { // Swapping inputs not possible in case of mixed precision. bli_print_msg(" column major not supported yet in bf16s4f32o.", __FILE__, __LINE__); - return; + goto err_hndl; } else { @@ -395,4 +443,7 @@ AOCL_GEMM_MATMUL(bfloat16, int8_t, bfloat16, float, bf16s4f32obf16) post_op_list, BF16); } #endif + +err_hndl:; + LPGEMM_STOP_LOGGER(); } diff --git a/addon/aocl_gemm/aocl_gemm_check.h b/addon/aocl_gemm/aocl_gemm_check.h index c7f610f35..60bec8bb4 100644 --- a/addon/aocl_gemm/aocl_gemm_check.h +++ b/addon/aocl_gemm/aocl_gemm_check.h @@ -37,7 +37,8 @@ m, n, k, \ a, lda, mtag_a, \ b, ldb, mtag_b, \ - c, ldc \ + c, ldc, \ + err_no \ ) \ { \ int32_t info = 0; \ @@ -98,7 +99,7 @@ \ sprintf( print_msg, "** On entry to %6s, parameter number %2i had an illegal value", op_str, info); \ bli_print_msg(print_msg, __FILE__, __LINE__); \ - return; \ + err_no = info; \ } \ } diff --git a/addon/aocl_gemm/aocl_gemm_f32f32f32of32.c b/addon/aocl_gemm/aocl_gemm_f32f32f32of32.c index e3db6e386..84dd229af 100644 --- a/addon/aocl_gemm/aocl_gemm_f32f32f32of32.c +++ b/addon/aocl_gemm/aocl_gemm_f32f32f32of32.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2022 - 2023, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -41,9 +41,23 @@ #include "lpgemm_config.h" #include "lpgemm_utils.h" #include "lpgemm_5loop_interface_apis.h" +#include "lpgemm_logger.h" AOCL_GEMM_MATMUL(float,float,float,float,f32f32f32of32) { + LPGEMM_START_LOGGER(); + LPGEMM_WRITE_LOGGER \ + ( + "f32f32f32of32", \ + order, transa, transb, \ + m, n, k, \ + ( ( float ) alpha ), \ + lda, mem_format_a, \ + ldb, mem_format_b, \ + ( ( float ) beta ), \ + ldc, post_op_unparsed \ + ); + trans_t blis_transa; trans_t blis_transb; @@ -52,7 +66,7 @@ AOCL_GEMM_MATMUL(float,float,float,float,f32f32f32of32) { bli_print_msg(" AVX2 ISA not supported by processor, " "cannot perform f32f32f32 gemm.", __FILE__, __LINE__ ); - return; // Error. + goto err_hndl; } /* Initialize BLIS. */ @@ -61,11 +75,8 @@ AOCL_GEMM_MATMUL(float,float,float,float,f32f32f32of32) // Initialize lpgemm context. aocl_lpgemm_init_global_cntx(); - AOCL_DTL_TRACE_ENTRY(AOCL_DTL_LEVEL_TRACE_1); - AOCL_DTL_LOG_GEMM_INPUTS(AOCL_DTL_LEVEL_TRACE_1, *MKSTR(s), transa, transb, m, n, k,\ - (void*)&alpha, lda, ldb, (void*)&beta, ldc); - // check for validity of params. + int err_no = 0; AOCL_GEMM_CHECK ( "f32f32f32of32", @@ -73,8 +84,13 @@ AOCL_GEMM_MATMUL(float,float,float,float,f32f32f32of32) m, n, k, a, lda, mem_format_a, b, ldb, mem_format_b, - c, ldc + c, ldc, + err_no ); + if ( err_no != 0 ) + { + goto err_hndl; + } /* Map BLAS chars to their corresponding BLIS enumerated type value. */ bli_param_map_netlib_to_blis_trans( transa, &blis_transa ); @@ -113,7 +129,7 @@ AOCL_GEMM_MATMUL(float,float,float,float,f32f32f32of32) if ( ( is_row_major == TRUE ) && ( mtag_a == REORDERED ) ) { bli_print_msg(" Reordering of A matrix is not supported.", __FILE__, __LINE__ ); - return; // Error. + goto err_hndl; } // Inputs swapped in column major, A becomes B from kernel point of view. @@ -121,7 +137,7 @@ AOCL_GEMM_MATMUL(float,float,float,float,f32f32f32of32) { bli_print_msg(" Reordering of column major matrices is not supported.", __FILE__, __LINE__ ); - return; //Error + goto err_hndl; } // By default enable packing for B matrix. Before the 5 loop, based on @@ -159,7 +175,10 @@ AOCL_GEMM_MATMUL(float,float,float,float,f32f32f32of32) m, n ); - if( err != BLIS_SUCCESS ) return; + if( err != BLIS_SUCCESS ) + { + goto err_hndl; + } // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. @@ -233,5 +252,6 @@ AOCL_GEMM_MATMUL(float,float,float,float,f32f32f32of32) } #endif - AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); +err_hndl:; + LPGEMM_STOP_LOGGER(); } diff --git a/addon/aocl_gemm/aocl_gemm_s8s8s16os16.c b/addon/aocl_gemm/aocl_gemm_s8s8s16os16.c index 2f73fcf42..80eae4db0 100644 --- a/addon/aocl_gemm/aocl_gemm_s8s8s16os16.c +++ b/addon/aocl_gemm/aocl_gemm_s8s8s16os16.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023 - 2024, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2023 - 2025, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -41,9 +41,23 @@ #include "lpgemm_thread_decor_openmp.h" #include "lpgemm_post_ops.h" #include "lpgemm_utils_s8.h" +#include "lpgemm_logger.h" AOCL_GEMM_MATMUL(int8_t,int8_t,int16_t,int16_t,s8s8s16os16) { + LPGEMM_START_LOGGER(); + LPGEMM_WRITE_LOGGER \ + ( + "s8s8s16os16", \ + order, transa, transb, \ + m, n, k, \ + ( ( float ) alpha ), \ + lda, mem_format_a, \ + ldb, mem_format_b, \ + ( ( float ) beta ), \ + ldc, post_op_unparsed \ + ); + trans_t blis_transa; trans_t blis_transb; @@ -52,7 +66,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int16_t,int16_t,s8s8s16os16) { bli_print_msg(" AVX2 ISA not supported by processor, " "cannot perform s8s8s16 gemm.", __FILE__, __LINE__ ); - return; // Error. + goto err_hndl; } /* Initialize BLIS. */ @@ -62,6 +76,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int16_t,int16_t,s8s8s16os16) aocl_lpgemm_init_global_cntx(); // check for validity of params. + int err_no = 0; AOCL_GEMM_CHECK ( "s8s8s16os16", @@ -69,8 +84,13 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int16_t,int16_t,s8s8s16os16) m, n, k, a, lda, mem_format_a, b, ldb, mem_format_b, - c, ldc + c, ldc, + err_no ); + if ( err_no != 0 ) + { + goto err_hndl; + } /* Map BLAS chars to their corresponding BLIS enumerated type value. */ bli_param_map_netlib_to_blis_trans(transa, &blis_transa); @@ -81,13 +101,13 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int16_t,int16_t,s8s8s16os16) if ( ( blis_transb != BLIS_NO_TRANSPOSE ) ) { bli_print_msg(" Transpose of B matrices is not supported.", __FILE__, __LINE__ ); - return; // Error. + goto err_hndl; } if ( ( order != 'r' ) && ( order != 'R' ) ) { bli_print_msg(" Operation only supports row-major matrices.", __FILE__, __LINE__ ); - return; // Only row major supported. + goto err_hndl; } inc_t rs_a = lda; @@ -126,7 +146,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int16_t,int16_t,s8s8s16os16) if ( !( bli_is_trans( blis_transa ) ) && ( mtag_a != UNPACKED ) ) { bli_print_msg(" A matrix needs to be unpacked.", __FILE__, __LINE__ ); - return; // Error. + goto err_hndl; } // Convert post op struct to post op linked list format. @@ -138,7 +158,10 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int16_t,int16_t,s8s8s16os16) m, n ); - if( err != BLIS_SUCCESS ) return; + if( err != BLIS_SUCCESS ) + { + goto err_hndl; + } // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. @@ -171,4 +194,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int16_t,int16_t,s8s8s16os16) post_op_list, S16 ); #endif + +err_hndl:; + LPGEMM_STOP_LOGGER(); } diff --git a/addon/aocl_gemm/aocl_gemm_s8s8s16os8.c b/addon/aocl_gemm/aocl_gemm_s8s8s16os8.c index 19bbfff7b..501adc148 100644 --- a/addon/aocl_gemm/aocl_gemm_s8s8s16os8.c +++ b/addon/aocl_gemm/aocl_gemm_s8s8s16os8.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023 - 2024, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2023 - 2025, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -41,9 +41,23 @@ #include "lpgemm_thread_decor_openmp.h" #include "lpgemm_post_ops.h" #include "lpgemm_utils_s8.h" +#include "lpgemm_logger.h" AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int16_t,s8s8s16os8) { + LPGEMM_START_LOGGER(); + LPGEMM_WRITE_LOGGER \ + ( + "s8s8s16os8", \ + order, transa, transb, \ + m, n, k, \ + ( ( float ) alpha ), \ + lda, mem_format_a, \ + ldb, mem_format_b, \ + ( ( float ) beta ), \ + ldc, post_op_unparsed \ + ); + trans_t blis_transa; trans_t blis_transb; @@ -52,7 +66,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int16_t,s8s8s16os8) { bli_print_msg(" AVX2 ISA not supported by processor, " "cannot perform s8s8s16 gemm.", __FILE__, __LINE__ ); - return; // Error. + goto err_hndl; } /* Initialize BLIS. */ @@ -62,6 +76,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int16_t,s8s8s16os8) aocl_lpgemm_init_global_cntx(); // check for validity of params. + int err_no = 0; AOCL_GEMM_CHECK ( "s8s8s16os8", @@ -69,8 +84,13 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int16_t,s8s8s16os8) m, n, k, a, lda, mem_format_a, b, ldb, mem_format_b, - c, ldc + c, ldc, + err_no ); + if ( err_no != 0 ) + { + goto err_hndl; + } /* Map BLAS chars to their corresponding BLIS enumerated type value. */ bli_param_map_netlib_to_blis_trans(transa, &blis_transa); @@ -81,13 +101,13 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int16_t,s8s8s16os8) if ( ( blis_transb != BLIS_NO_TRANSPOSE ) ) { bli_print_msg(" Transpose of B matrices is not supported.", __FILE__, __LINE__ ); - return; // Error. + goto err_hndl; } if ( ( order != 'r' ) && ( order != 'R' ) ) { bli_print_msg(" Operation only supports row-major matrices.", __FILE__, __LINE__ ); - return; // Only row major supported. + goto err_hndl; } inc_t rs_a = lda; @@ -126,7 +146,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int16_t,s8s8s16os8) if ( !( bli_is_trans( blis_transa ) ) && ( mtag_a != UNPACKED ) ) { bli_print_msg(" A matrix needs to be unpacked.", __FILE__, __LINE__ ); - return; // Error. + goto err_hndl; } // Convert post op struct to post op linked list format. @@ -138,7 +158,10 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int16_t,s8s8s16os8) m, n ); - if( err != BLIS_SUCCESS ) return; + if( err != BLIS_SUCCESS ) + { + goto err_hndl; + } // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. @@ -171,4 +194,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int16_t,s8s8s16os8) post_op_list, S8 ); #endif + +err_hndl:; + LPGEMM_STOP_LOGGER(); } diff --git a/addon/aocl_gemm/aocl_gemm_s8s8s32os32.c b/addon/aocl_gemm/aocl_gemm_s8s8s32os32.c index 747f9155e..b3d28a1d6 100644 --- a/addon/aocl_gemm/aocl_gemm_s8s8s32os32.c +++ b/addon/aocl_gemm/aocl_gemm_s8s8s32os32.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023 - 2024, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2023 - 2025, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -41,9 +41,23 @@ #include "lpgemm_5loop_interface_apis.h" #include "lpgemm_config.h" #include "lpgemm_utils_s8.h" +#include "lpgemm_logger.h" AOCL_GEMM_MATMUL(int8_t,int8_t,int32_t,int32_t,s8s8s32os32) { + LPGEMM_START_LOGGER(); + LPGEMM_WRITE_LOGGER \ + ( + "s8s8s32os32", \ + order, transa, transb, \ + m, n, k, \ + ( ( float ) alpha ), \ + lda, mem_format_a, \ + ldb, mem_format_b, \ + ( ( float ) beta ), \ + ldc, post_op_unparsed \ + ); + trans_t blis_transa; trans_t blis_transb; @@ -52,7 +66,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int32_t,int32_t,s8s8s32os32) { bli_print_msg(" AVX512_VNNI ISA not supported by processor, " "cannot perform s8s8s32 gemm.", __FILE__, __LINE__ ); - return; // Error. + goto err_hndl; } /* Initialize BLIS. */ @@ -62,6 +76,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int32_t,int32_t,s8s8s32os32) aocl_lpgemm_init_global_cntx(); // check for validity of params. + int err_no = 0; AOCL_GEMM_CHECK ( "s8s8s32os32", @@ -69,8 +84,13 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int32_t,int32_t,s8s8s32os32) m, n, k, a, lda, mem_format_a, b, ldb, mem_format_b, - c, ldc + c, ldc, + err_no ); + if ( err_no != 0 ) + { + goto err_hndl; + } /* Map BLAS chars to their corresponding BLIS enumerated type value. */ bli_param_map_netlib_to_blis_trans( transa, &blis_transa ); @@ -85,7 +105,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int32_t,int32_t,s8s8s32os32) { bli_print_msg("Column major inputs not supported with Post-ops.", __FILE__, __LINE__); - return; + goto err_hndl; } inc_t rs_a = lda; @@ -120,7 +140,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int32_t,int32_t,s8s8s32os32) { bli_print_msg(" Reordering of A matrix is not supported " "in row major case.", __FILE__, __LINE__); - return; + goto err_hndl; } // Inputs swapped in column major, A becomes B from kernel point of view. // Reorder is not supported for column major matrices. @@ -129,7 +149,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int32_t,int32_t,s8s8s32os32) { bli_print_msg(" Reordering of column major matrices " "is not supported.", __FILE__, __LINE__); - return; + goto err_hndl; } // From 5-loop function point of view @@ -169,7 +189,10 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int32_t,int32_t,s8s8s32os32) m, n ); - if( err != BLIS_SUCCESS ) return; + if( err != BLIS_SUCCESS ) + { + goto err_hndl; + } // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. @@ -228,4 +251,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int32_t,int32_t,s8s8s32os32) post_op_list, S32); } #endif + +err_hndl:; + LPGEMM_STOP_LOGGER(); } diff --git a/addon/aocl_gemm/aocl_gemm_s8s8s32os8.c b/addon/aocl_gemm/aocl_gemm_s8s8s32os8.c index ffeef5ba1..1c092ed08 100644 --- a/addon/aocl_gemm/aocl_gemm_s8s8s32os8.c +++ b/addon/aocl_gemm/aocl_gemm_s8s8s32os8.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023 - 2024, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2023 - 2025, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -41,9 +41,23 @@ #include "lpgemm_5loop_interface_apis.h" #include "lpgemm_config.h" #include "lpgemm_utils_s8.h" +#include "lpgemm_logger.h" AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int32_t,s8s8s32os8) { + LPGEMM_START_LOGGER(); + LPGEMM_WRITE_LOGGER \ + ( + "s8s8s32os8", \ + order, transa, transb, \ + m, n, k, \ + ( ( float ) alpha ), \ + lda, mem_format_a, \ + ldb, mem_format_b, \ + ( ( float ) beta ), \ + ldc, post_op_unparsed \ + ); + trans_t blis_transa; trans_t blis_transb; @@ -52,7 +66,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int32_t,s8s8s32os8) { bli_print_msg(" AVX512_VNNI ISA not supported by processor, " "cannot perform s8s8s32 gemm.", __FILE__, __LINE__ ); - return; // Error. + goto err_hndl; } /* Initialize BLIS. */ @@ -62,6 +76,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int32_t,s8s8s32os8) aocl_lpgemm_init_global_cntx(); // check for validity of params. + int err_no = 0; AOCL_GEMM_CHECK ( "s8s8s32os8", @@ -69,8 +84,13 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int32_t,s8s8s32os8) m, n, k, a, lda, mem_format_a, b, ldb, mem_format_b, - c, ldc + c, ldc, + err_no ); + if ( err_no != 0 ) + { + goto err_hndl; + } /* Map BLAS chars to their corresponding BLIS enumerated type value. */ bli_param_map_netlib_to_blis_trans( transa, &blis_transa ); @@ -85,7 +105,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int32_t,s8s8s32os8) { bli_print_msg("Column major inputs not supported with Post-ops.", __FILE__, __LINE__); - return; + goto err_hndl; } // The strides are set assuming a row major kernel. @@ -120,7 +140,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int32_t,s8s8s32os8) { bli_print_msg(" Reordering of A matrix is not supported in " " row major case.", __FILE__, __LINE__); - return; + goto err_hndl; } // Inputs swapped in column major, A becomes B from kernel point of view. // Reorder is not supported for column major matrices. @@ -129,7 +149,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int32_t,s8s8s32os8) { bli_print_msg(" Reordering of column major matrices is " " not supported.", __FILE__, __LINE__); - return; + goto err_hndl; } // From 5-loop function point of view @@ -169,7 +189,10 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int32_t,s8s8s32os8) m, n ); - if( err != BLIS_SUCCESS ) return; + if( err != BLIS_SUCCESS ) + { + goto err_hndl; + } // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. @@ -235,4 +258,7 @@ AOCL_GEMM_MATMUL(int8_t,int8_t,int8_t,int32_t,s8s8s32os8) ); } #endif + +err_hndl:; + LPGEMM_STOP_LOGGER(); } diff --git a/addon/aocl_gemm/aocl_gemm_u8s8s16os16.c b/addon/aocl_gemm/aocl_gemm_u8s8s16os16.c index d6b179f29..867080522 100644 --- a/addon/aocl_gemm/aocl_gemm_u8s8s16os16.c +++ b/addon/aocl_gemm/aocl_gemm_u8s8s16os16.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2022 - 2024, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -41,9 +41,23 @@ #include "lpgemm_utils.h" #include "lpgemm_thread_decor_openmp.h" #include "lpgemm_post_ops.h" +#include "lpgemm_logger.h" AOCL_GEMM_MATMUL(uint8_t,int8_t,int16_t,int16_t,u8s8s16os16) { + LPGEMM_START_LOGGER(); + LPGEMM_WRITE_LOGGER \ + ( + "u8s8s16os16", \ + order, transa, transb, \ + m, n, k, \ + ( ( float ) alpha ), \ + lda, mem_format_a, \ + ldb, mem_format_b, \ + ( ( float ) beta ), \ + ldc, post_op_unparsed \ + ); + trans_t blis_transa; trans_t blis_transb; @@ -52,7 +66,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int16_t,int16_t,u8s8s16os16) { bli_print_msg(" AVX2 ISA not supported by processor, " "cannot perform u8s8s16 gemm.", __FILE__, __LINE__ ); - return; // Error. + goto err_hndl; } /* Initialize BLIS. */ @@ -62,6 +76,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int16_t,int16_t,u8s8s16os16) aocl_lpgemm_init_global_cntx(); // check for validity of params. + int err_no = 0; AOCL_GEMM_CHECK ( "u8s8s16os16", @@ -69,8 +84,13 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int16_t,int16_t,u8s8s16os16) m, n, k, a, lda, mem_format_a, b, ldb, mem_format_b, - c, ldc + c, ldc, + err_no ); + if ( err_no != 0 ) + { + goto err_hndl; + } /* Map BLAS chars to their corresponding BLIS enumerated type value. */ bli_param_map_netlib_to_blis_trans(transa, &blis_transa); @@ -81,13 +101,13 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int16_t,int16_t,u8s8s16os16) if ( ( blis_transb != BLIS_NO_TRANSPOSE ) ) { bli_print_msg(" Transpose of B matrices is not supported.", __FILE__, __LINE__ ); - return; // Error. + goto err_hndl; } if ( ( order != 'r' ) && ( order != 'R' ) ) { bli_print_msg(" Operation only supports row-major matrices.", __FILE__, __LINE__ ); - return; // Only row major supported. + goto err_hndl; } inc_t rs_a = lda; @@ -126,7 +146,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int16_t,int16_t,u8s8s16os16) if ( !( bli_is_trans( blis_transa ) ) && ( mtag_a != UNPACKED ) ) { bli_print_msg(" A matrix needs to be unpacked.", __FILE__, __LINE__ ); - return; // Error. + goto err_hndl; } // Convert post op struct to post op linked list format. @@ -138,7 +158,10 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int16_t,int16_t,u8s8s16os16) m, n ); - if( err != BLIS_SUCCESS ) return; + if( err != BLIS_SUCCESS ) + { + goto err_hndl; + } // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. @@ -171,4 +194,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int16_t,int16_t,u8s8s16os16) post_op_list, S16 ); #endif + +err_hndl:; + LPGEMM_STOP_LOGGER(); } diff --git a/addon/aocl_gemm/aocl_gemm_u8s8s16os8.c b/addon/aocl_gemm/aocl_gemm_u8s8s16os8.c index 3c10c7530..38ff439c6 100644 --- a/addon/aocl_gemm/aocl_gemm_u8s8s16os8.c +++ b/addon/aocl_gemm/aocl_gemm_u8s8s16os8.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2022 - 2024, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -41,9 +41,22 @@ #include "lpgemm_utils.h" #include "lpgemm_thread_decor_openmp.h" #include "lpgemm_post_ops.h" +#include "lpgemm_logger.h" AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int16_t,u8s8s16os8) { + LPGEMM_START_LOGGER(); + LPGEMM_WRITE_LOGGER \ + ( + "u8s8s16os8", \ + order, transa, transb, \ + m, n, k, \ + ( ( float ) alpha ), \ + lda, mem_format_a, \ + ldb, mem_format_b, \ + ( ( float ) beta ), \ + ldc, post_op_unparsed \ + ); trans_t blis_transa; trans_t blis_transb; @@ -52,7 +65,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int16_t,u8s8s16os8) { bli_print_msg(" AVX2 ISA not supported by processor, " "cannot perform u8s8s16 gemm.", __FILE__, __LINE__ ); - return; // Error. + goto err_hndl; } /* Initialize BLIS. */ @@ -62,6 +75,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int16_t,u8s8s16os8) aocl_lpgemm_init_global_cntx(); // check for validity of params. + int err_no = 0; AOCL_GEMM_CHECK ( "u8s8s16os8", @@ -69,8 +83,13 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int16_t,u8s8s16os8) m, n, k, a, lda, mem_format_a, b, ldb, mem_format_b, - c, ldc + c, ldc, + err_no ); + if ( err_no != 0 ) + { + goto err_hndl; + } /* Map BLAS chars to their corresponding BLIS enumerated type value. */ bli_param_map_netlib_to_blis_trans(transa, &blis_transa); @@ -81,13 +100,13 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int16_t,u8s8s16os8) if ( ( blis_transb != BLIS_NO_TRANSPOSE ) ) { bli_print_msg(" Transpose of B matrices is not supported.", __FILE__, __LINE__ ); - return; // Error. + goto err_hndl; } if ( ( order != 'r' ) && ( order != 'R' ) ) { bli_print_msg(" Operation only supports row-major matrices.", __FILE__, __LINE__ ); - return; // Only row major supported. + goto err_hndl; } inc_t rs_a = lda; @@ -126,7 +145,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int16_t,u8s8s16os8) if ( !( bli_is_trans( blis_transa ) ) && ( mtag_a != UNPACKED ) ) { bli_print_msg(" A matrix needs to be unpacked.", __FILE__, __LINE__ ); - return; // Error. + goto err_hndl; } // Convert post op struct to post op linked list format. @@ -138,7 +157,10 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int16_t,u8s8s16os8) m, n ); - if( err != BLIS_SUCCESS ) return; + if( err != BLIS_SUCCESS ) + { + goto err_hndl; + } // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. @@ -171,4 +193,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int16_t,u8s8s16os8) post_op_list, S8 ); #endif + +err_hndl:; + LPGEMM_STOP_LOGGER(); } diff --git a/addon/aocl_gemm/aocl_gemm_u8s8s16ou8.c b/addon/aocl_gemm/aocl_gemm_u8s8s16ou8.c index f29028d57..c6a6e93b5 100644 --- a/addon/aocl_gemm/aocl_gemm_u8s8s16ou8.c +++ b/addon/aocl_gemm/aocl_gemm_u8s8s16ou8.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2023 - 2024, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2023 - 2025, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -41,9 +41,22 @@ #include "lpgemm_utils.h" #include "lpgemm_thread_decor_openmp.h" #include "lpgemm_post_ops.h" +#include "lpgemm_logger.h" AOCL_GEMM_MATMUL(uint8_t,int8_t,uint8_t,int16_t,u8s8s16ou8) { + LPGEMM_START_LOGGER(); + LPGEMM_WRITE_LOGGER \ + ( + "u8s8s16ou8", \ + order, transa, transb, \ + m, n, k, \ + ( ( float ) alpha ), \ + lda, mem_format_a, \ + ldb, mem_format_b, \ + ( ( float ) beta ), \ + ldc, post_op_unparsed \ + ); trans_t blis_transa; trans_t blis_transb; @@ -52,7 +65,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,uint8_t,int16_t,u8s8s16ou8) { bli_print_msg(" AVX2 ISA not supported by processor, " "cannot perform u8s8s16 gemm.", __FILE__, __LINE__ ); - return; // Error. + goto err_hndl; } /* Initialize BLIS. */ @@ -62,6 +75,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,uint8_t,int16_t,u8s8s16ou8) aocl_lpgemm_init_global_cntx(); // check for validity of params. + int err_no = 0; AOCL_GEMM_CHECK ( "u8s8s16ou8", @@ -69,8 +83,13 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,uint8_t,int16_t,u8s8s16ou8) m, n, k, a, lda, mem_format_a, b, ldb, mem_format_b, - c, ldc + c, ldc, + err_no ); + if ( err_no != 0 ) + { + goto err_hndl; + } /* Map BLAS chars to their corresponding BLIS enumerated type value. */ bli_param_map_netlib_to_blis_trans(transa, &blis_transa); @@ -81,13 +100,13 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,uint8_t,int16_t,u8s8s16ou8) if ( ( blis_transb != BLIS_NO_TRANSPOSE ) ) { bli_print_msg(" Transpose of B matrices is not supported.", __FILE__, __LINE__ ); - return; // Error. + goto err_hndl; } if ( ( order != 'r' ) && ( order != 'R' ) ) { bli_print_msg(" Operation only supports row-major matrices.", __FILE__, __LINE__ ); - return; // Only row major supported. + goto err_hndl; } inc_t rs_a = lda; @@ -126,7 +145,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,uint8_t,int16_t,u8s8s16ou8) if ( !( bli_is_trans( blis_transa ) ) && ( mtag_a != UNPACKED ) ) { bli_print_msg(" A matrix needs to be unpacked.", __FILE__, __LINE__ ); - return; // Error. + goto err_hndl; } // Convert post op struct to post op linked list format. @@ -138,7 +157,10 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,uint8_t,int16_t,u8s8s16ou8) m, n ); - if( err != BLIS_SUCCESS ) return; + if( err != BLIS_SUCCESS ) + { + goto err_hndl; + } // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. @@ -171,4 +193,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,uint8_t,int16_t,u8s8s16ou8) post_op_list, U8 ); #endif + +err_hndl:; + LPGEMM_STOP_LOGGER(); } diff --git a/addon/aocl_gemm/aocl_gemm_u8s8s32os32.c b/addon/aocl_gemm/aocl_gemm_u8s8s32os32.c index 56c1b06db..5902ef445 100644 --- a/addon/aocl_gemm/aocl_gemm_u8s8s32os32.c +++ b/addon/aocl_gemm/aocl_gemm_u8s8s32os32.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2022 - 2024, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -41,9 +41,22 @@ #include "lpgemm_5loop_interface_apis.h" #include "lpgemm_config.h" #include "lpgemm_utils.h" +#include "lpgemm_logger.h" AOCL_GEMM_MATMUL(uint8_t,int8_t,int32_t,int32_t,u8s8s32os32) { + LPGEMM_START_LOGGER(); + LPGEMM_WRITE_LOGGER \ + ( + "u8s8s32os32", \ + order, transa, transb, \ + m, n, k, \ + ( ( float ) alpha ), \ + lda, mem_format_a, \ + ldb, mem_format_b, \ + ( ( float ) beta ), \ + ldc, post_op_unparsed \ + ); trans_t blis_transa; trans_t blis_transb; @@ -52,7 +65,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int32_t,int32_t,u8s8s32os32) { bli_print_msg(" AVX512_VNNI ISA not supported by processor, " "cannot perform u8s8s32 gemm.", __FILE__, __LINE__ ); - return; // Error. + goto err_hndl; } /* Initialize BLIS. */ @@ -62,6 +75,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int32_t,int32_t,u8s8s32os32) aocl_lpgemm_init_global_cntx(); // check for validity of params. + int err_no = 0; AOCL_GEMM_CHECK ( "u8s8s32os32", @@ -69,8 +83,13 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int32_t,int32_t,u8s8s32os32) m, n, k, a, lda, mem_format_a, b, ldb, mem_format_b, - c, ldc + c, ldc, + err_no ); + if ( err_no != 0 ) + { + goto err_hndl; + } /* Map BLAS chars to their corresponding BLIS enumerated type value. */ bli_param_map_netlib_to_blis_trans( transa, &blis_transa ); @@ -85,7 +104,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int32_t,int32_t,u8s8s32os32) { bli_print_msg("Column major inputs not supported with Post-ops.", __FILE__, __LINE__); - return; + goto err_hndl; } inc_t rs_a = lda; @@ -121,7 +140,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int32_t,int32_t,u8s8s32os32) bli_print_msg(" Reordering of A matrix is not supported " "in row major case.", __FILE__, __LINE__); - return; + goto err_hndl; } // Inputs swapped in column major, A becomes B from kernel point of view. // Reorder is not supported for column major matrices. @@ -131,7 +150,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int32_t,int32_t,u8s8s32os32) bli_print_msg(" Reordering of column major matrices " "is not supported.", __FILE__, __LINE__); - return; + goto err_hndl; } // From 5-loop function point of view @@ -171,7 +190,10 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int32_t,int32_t,u8s8s32os32) m, n ); - if( err != BLIS_SUCCESS ) return; + if( err != BLIS_SUCCESS ) + { + goto err_hndl; + } // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. @@ -230,4 +252,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int32_t,int32_t,u8s8s32os32) post_op_list, S32); } #endif + +err_hndl:; + LPGEMM_STOP_LOGGER(); } diff --git a/addon/aocl_gemm/aocl_gemm_u8s8s32os8.c b/addon/aocl_gemm/aocl_gemm_u8s8s32os8.c index 13184b593..8756713f1 100644 --- a/addon/aocl_gemm/aocl_gemm_u8s8s32os8.c +++ b/addon/aocl_gemm/aocl_gemm_u8s8s32os8.c @@ -4,7 +4,7 @@ An object-based framework for developing high-performance BLAS-like libraries. - Copyright (C) 2022 - 2024, Advanced Micro Devices, Inc. All rights reserved. + Copyright (C) 2022 - 2025, Advanced Micro Devices, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -41,9 +41,22 @@ #include "lpgemm_5loop_interface_apis.h" #include "lpgemm_config.h" #include "lpgemm_utils.h" +#include "lpgemm_logger.h" AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int32_t,u8s8s32os8) { + LPGEMM_START_LOGGER(); + LPGEMM_WRITE_LOGGER \ + ( + "u8s8s32os8", \ + order, transa, transb, \ + m, n, k, \ + ( ( float ) alpha ), \ + lda, mem_format_a, \ + ldb, mem_format_b, \ + ( ( float ) beta ), \ + ldc, post_op_unparsed \ + ); trans_t blis_transa; trans_t blis_transb; @@ -52,7 +65,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int32_t,u8s8s32os8) { bli_print_msg(" AVX512_VNNI ISA not supported by processor, " "cannot perform u8s8s32 gemm.", __FILE__, __LINE__ ); - return; // Error. + goto err_hndl; } /* Initialize BLIS. */ @@ -62,6 +75,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int32_t,u8s8s32os8) aocl_lpgemm_init_global_cntx(); // check for validity of params. + int err_no = 0; AOCL_GEMM_CHECK ( "u8s8s32os8", @@ -69,8 +83,13 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int32_t,u8s8s32os8) m, n, k, a, lda, mem_format_a, b, ldb, mem_format_b, - c, ldc + c, ldc, + err_no ); + if ( err_no != 0 ) + { + goto err_hndl; + } /* Map BLAS chars to their corresponding BLIS enumerated type value. */ bli_param_map_netlib_to_blis_trans(transa, &blis_transa); @@ -85,7 +104,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int32_t,u8s8s32os8) { bli_print_msg("Column major inputs not supported with Post-ops.", __FILE__, __LINE__); - return; + goto err_hndl; } inc_t rs_a = lda; @@ -121,7 +140,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int32_t,u8s8s32os8) bli_print_msg(" Reordering of A matrix is not supported " "in row major case.", __FILE__, __LINE__); - return; + goto err_hndl; } // Inputs swapped in column major, A becomes B from kernel point of view. // Reorder is not supported for column major matrices. @@ -131,7 +150,7 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int32_t,u8s8s32os8) bli_print_msg(" Reordering of column major matrices " "is not supported.", __FILE__, __LINE__); - return; + goto err_hndl; } // From 5-loop function point of view @@ -171,7 +190,10 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int32_t,u8s8s32os8) m, n ); - if( err != BLIS_SUCCESS ) return; + if( err != BLIS_SUCCESS ) + { + goto err_hndl; + } // Initialize a local runtime with global settings if necessary. Note // that in the case that a runtime is passed in, we make a local copy. @@ -231,4 +253,6 @@ AOCL_GEMM_MATMUL(uint8_t,int8_t,int8_t,int32_t,u8s8s32os8) } #endif +err_hndl:; + LPGEMM_STOP_LOGGER(); } diff --git a/addon/aocl_gemm/config/lpgemm_config.c b/addon/aocl_gemm/config/lpgemm_config.c index ef6a3c97b..d744d7a62 100644 --- a/addon/aocl_gemm/config/lpgemm_config.c +++ b/addon/aocl_gemm/config/lpgemm_config.c @@ -46,6 +46,7 @@ #include "lpgemm_packb_s8.h" #include "lpgemm_packb_s8s16.h" #include "lpgemm_pack_f32.h" +#include "lpgemm_logger.h" static lpgemm_cntx_t global_cntx_t_list[AOCL_OPERATION_TYPE_LEN] \ __attribute__((aligned(64))); //Only one op type supported now. diff --git a/addon/aocl_gemm/frame/logging/lpgemm_logger.c b/addon/aocl_gemm/frame/logging/lpgemm_logger.c new file mode 100644 index 000000000..419cd9a85 --- /dev/null +++ b/addon/aocl_gemm/frame/logging/lpgemm_logger.c @@ -0,0 +1,339 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" +#include "lpgemm_sys.h" +#include "lpgemm_logger.h" +#include "lpgemm_post_ops.h" +#include "lpgemm_types.h" +#include + +#ifdef AOCL_LPGEMM_LOGGER_SUPPORT + +static bli_pthread_once_t once_check_lpgemm_logger_init = BLIS_PTHREAD_ONCE_INIT; + +static bool lpgemm_logger_enabled = FALSE; + +FILE* lpgemm_start_logger_fn(void) +{ + lpgemm_init_logger(); + + FILE* fd = NULL; + + if ( lpgemm_logger_enabled == TRUE ) + { + char log_file[255] = {0}; + sprintf( log_file, "%s_P%lu_T%lu%s", + AOCL_LPGEMM_LOG_FILE_PRFX, + lpgemm_getpid(), lpgemm_gettid(), + AOCL_LPGEMM_LOG_FILE_EXT ); + + fd = fopen( log_file, "a" ); + } + + return fd; +} + +void lpgemm_stop_logger_fn( FILE* fd ) +{ + if ( ( lpgemm_logger_enabled == TRUE ) && ( fd != NULL ) ) + { + fflush( fd ); + fclose( fd ); + } +} + +#define LPGEMM_POST_OPS_STR_COPY(ops_str, ops_str_len, p_str) \ + do \ + { \ + char* c_ops_str = p_str; \ + size_t c_ops_str_len = strlen( c_ops_str ); \ + strcpy( ops_str + ops_str_len, c_ops_str ); \ + ops_str_len += c_ops_str_len; \ + } while ( 0 ); \ + +static void lpgemm_get_pre_ops_str( aocl_post_op* post_ops, char* ops_str ) +{ + if ( post_ops == NULL ) + { + strcpy( ops_str, "none" ); + return; + } + + aocl_pre_op* pre_ops = post_ops->pre_ops; + if ( ( pre_ops == NULL ) || ( pre_ops->seq_length <= 0 ) ) + { + strcpy( ops_str, "none" ); + return; + } + if ( ( pre_ops->seq_length > AOCL_MAX_POST_OPS ) ) + { + strcpy( ops_str, "ops over-limit" ); + return; + } + + size_t ops_str_len = 0; + char* delim_str = "#"; + size_t delim_str_len = strlen( delim_str ); + + LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "group_sz=" ); + int written = sprintf( ( ops_str + ops_str_len ), "%ld", pre_ops->group_size ); + if ( written > 0 ) + { + ops_str += written; + } + strcpy( ops_str + ops_str_len, delim_str ); + ops_str_len += delim_str_len; + + for (dim_t i = 0; i < pre_ops->seq_length; ++i) + { + LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "scale=" ); + if ( ( pre_ops->b_scl ) != NULL ) + { + if ( ( pre_ops->b_scl + i )->scale_factor_len == 1 ) + { + LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "scalar_scale_factor," ); + } + else + { + LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "vector_scale_factor," ); + } + } + + if ( ( pre_ops->b_zp ) != NULL ) + { + if ( ( pre_ops->b_zp + i )->zero_point_len == 1 ) + { + LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "scalar_zero_point," ); + } + else + { + LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "vector_zero_point," ); + } + } + + strcpy( ops_str + ops_str_len, delim_str ); + ops_str_len += delim_str_len; + } +} + +static void lpgemm_get_post_ops_str( aocl_post_op* post_ops, char* ops_str ) +{ + if ( ( post_ops == NULL ) || ( post_ops->seq_length <= 0 ) ) + { + strcpy( ops_str, "none" ); + return; + } + if ( ( post_ops->seq_length > AOCL_MAX_POST_OPS ) ) + { + strcpy( ops_str, "ops over-limit" ); + return; + } + + size_t ops_str_len = 0; + dim_t e_i = 0; // Multiple eltwise supported. + dim_t s_i = 0; // Multiple sum/scale supported. + char* delim_str = "#"; + size_t delim_str_len = strlen( delim_str ); + for ( dim_t i = 0; i < post_ops->seq_length; ++i ) + { + // Dispatcher code + switch ( *( post_ops->seq_vector + i ) ) + { + case ELTWISE: + { + LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "eltwise="); + // Eltwise algo dispatcher. + switch ( ( post_ops->eltwise + e_i )->algo.algo_type ) + { + case RELU: + { + LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "relu"); + } + break; + case PRELU: + { + LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "prelu" ); + } + break; + case GELU_TANH: + { + LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "gelu_tanh" ); + } + break; + case GELU_ERF: + { + LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "gelu_erf" ); + } + break; + case CLIP: + { + LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "clip" ); + } + break; + case SWISH: + { + LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "swish" ); + } + break; + case TANH: + { + LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "tanh" ); + } + break; + case SIGMOID: + { + LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "sigmoid" ); + } + break; + default: + break; + } + e_i += 1; + } + break; + case BIAS: + { + LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "bias" ); + } + break; + case SCALE: + { + LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "scale=" ); + if ( ( post_ops->sum + s_i )->scale_factor_len == 1 ) + { + LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "scalar_scale_factor," ); + } + else + { + LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "vector_scale_factor," ); + } + + if ( ( post_ops->sum + s_i )->zero_point_len == 1 ) + { + LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "scalar_zero_point," ); + } + else + { + LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "vector_zero_point," ); + } + + s_i += 1; + } + break; + case MATRIX_ADD: + { + LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "mat_add" ); + } + break; + case MATRIX_MUL: + { + LPGEMM_POST_OPS_STR_COPY( ops_str, ops_str_len, "mat_mul" ); + } + break; + default: + break; + } + + strcpy( ops_str + ops_str_len, delim_str ); + ops_str_len += delim_str_len; + } +} + +void lpgemm_write_logger_gemm_fn + ( + FILE* fd, + char* op_type, + const char order, + const char transa, + const char transb, + const dim_t m, + const dim_t n, + const dim_t k, + const float alpha, + const dim_t lda, + const char mem_format_a, + const dim_t ldb, + const char mem_format_b, + const float beta, + const dim_t ldc, + aocl_post_op* post_op_unparsed + ) +{ + if ( ( lpgemm_logger_enabled == TRUE ) && ( fd != NULL ) ) + { + char pre_ops_str[1024] = {0}; + lpgemm_get_pre_ops_str( post_op_unparsed, pre_ops_str ); + + char post_ops_str[2048] = {0}; + lpgemm_get_post_ops_str( post_op_unparsed, post_ops_str ); + + fprintf( fd, "%c %c %c %c %c %ld %ld %ld %ld %ld %ld "\ + "%s:pre_ops=[%s]:post_ops=[%s] %f %f ", + order, transa, transb, mem_format_a, mem_format_b, + m, n, k, lda, ldb, ldc, + op_type, pre_ops_str, post_ops_str, + alpha, beta ); + } +} + +void lpgemm_write_logger_time_break_fn( FILE* fd, double stime ) +{ + if ( ( lpgemm_logger_enabled == TRUE ) && ( fd != NULL ) ) + { + fprintf( fd, "%f \n", stime ); + } +} + +void _lpgemm_init_logger() +{ + lpgemm_logger_enabled = + bli_env_get_var( "AOCL_ENABLE_LPGEMM_LOGGER", FALSE ); +} + +void lpgemm_init_logger() +{ + bli_pthread_once + ( + &once_check_lpgemm_logger_init, + _lpgemm_init_logger + ); +} + +#else + +void lpgemm_init_logger() +{} + +#endif diff --git a/addon/aocl_gemm/frame/logging/lpgemm_logger.h b/addon/aocl_gemm/frame/logging/lpgemm_logger.h new file mode 100644 index 000000000..d908059b3 --- /dev/null +++ b/addon/aocl_gemm/frame/logging/lpgemm_logger.h @@ -0,0 +1,96 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef LPGEMM_LOGGER_H +#define LPGEMM_LOGGER_H + +#ifdef AOCL_LPGEMM_LOGGER_SUPPORT + +#define AOCL_LPGEMM_LOG_FILE_PRFX "aocl_gemm_log" +#define AOCL_LPGEMM_LOG_FILE_EXT ".txt" + +FILE* lpgemm_start_logger_fn(void); +void lpgemm_stop_logger_fn( FILE* fd ); +void lpgemm_write_logger_gemm_fn + ( + FILE* fd, + char* op_type, + const char order, + const char transa, + const char transb, + const dim_t m, + const dim_t n, + const dim_t k, + const float alpha, + const dim_t lda, + const char mem_format_a, + const dim_t ldb, + const char mem_format_b, + const float beta, + const dim_t ldc, + aocl_post_op* post_op_unparsed + ); +void lpgemm_write_logger_time_break_fn( FILE* fd, double stime ); + +#define LPGEMM_START_LOGGER() \ + FILE* fd = lpgemm_start_logger_fn(); \ + double aocl_lpgemm_logger_start_time = bli_clock(); \ + +#define LPGEMM_STOP_LOGGER() \ + double aocl_lpgemm_logger_stop_time = DBL_MAX; \ + aocl_lpgemm_logger_stop_time = \ + bli_clock_min_diff \ + ( \ + aocl_lpgemm_logger_stop_time, \ + aocl_lpgemm_logger_start_time \ + ); \ + lpgemm_write_logger_time_break_fn( fd, aocl_lpgemm_logger_stop_time ); \ + lpgemm_stop_logger_fn( fd ); \ + +#define LPGEMM_WRITE_LOGGER(...) \ + lpgemm_write_logger_gemm_fn( fd, __VA_ARGS__ ); \ + +#else + +#define LPGEMM_START_LOGGER(...) + +#define LPGEMM_STOP_LOGGER(...) + +#define LPGEMM_WRITE_LOGGER(...) + +#endif + +void lpgemm_init_logger(); + +#endif //LPGEMM_LOGGER_H diff --git a/addon/aocl_gemm/frame/sys_utils/lpgemm_sys.c b/addon/aocl_gemm/frame/sys_utils/lpgemm_sys.c new file mode 100644 index 000000000..3e947a28c --- /dev/null +++ b/addon/aocl_gemm/frame/sys_utils/lpgemm_sys.c @@ -0,0 +1,67 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" +#include "lpgemm_sys.h" + +#if defined(__linux__) +#include +#include +#endif + +uint64_t lpgemm_gettid( void ) +{ +#ifdef BLIS_ENABLE_OPENMP + return ( uint64_t )omp_get_thread_num(); +#else + #ifdef BLIS_ENABLE_PTHREADS + #ifndef _WIN32 + return ( uint64_t ) pthread_self(); + #else + return 0; + #endif + #else + return 0; + #endif +#endif +} + +uint64_t lpgemm_getpid( void ) +{ +#if defined(__linux__) + return ( uint64_t ) getpid(); +#else + return 0; +#endif +} diff --git a/addon/aocl_gemm/frame/sys_utils/lpgemm_sys.h b/addon/aocl_gemm/frame/sys_utils/lpgemm_sys.h new file mode 100644 index 000000000..2e9e55c10 --- /dev/null +++ b/addon/aocl_gemm/frame/sys_utils/lpgemm_sys.h @@ -0,0 +1,41 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2025, Advanced Micro Devices, Inc. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef LPGEMM_SYS_UTILS_H +#define LPGEMM_SYS_UTILS_H + +uint64_t lpgemm_gettid( void ); +uint64_t lpgemm_getpid( void ); + +#endif //LPGEMM_SYS_UTILS_H diff --git a/bench/bench_aocl_gemm/bench_input.txt b/bench/bench_aocl_gemm/bench_input.txt index 4e3955d46..d758cd6f2 100644 --- a/bench/bench_aocl_gemm/bench_input.txt +++ b/bench/bench_aocl_gemm/bench_input.txt @@ -1,5 +1,4 @@ c n t n n 32 128 2 32 128 32 bf16bf16f32of32:bias=na,swish -# r n n n r 6 1 4 4 16 16 bf16s4f32of32:pre_op_scale=scalar,pre_op_scale_type=bf16,group_size=2 r n n n r 6 1 4 4 16 16 bf16s4f32of32:pre_op_zp=vector,pre_op_scale=scalar,pre_op_scale_type=bf16,group_size=2 r n n n r 6 1 4 4 16 16 bf16s4f32of32:pre_op_zp=scalar,pre_op_scale=scalar,pre_op_scale_type=bf16,group_size=2 diff --git a/bench/bench_aocl_gemm/bench_lpgemm.c b/bench/bench_aocl_gemm/bench_lpgemm.c index a441f5bae..e39a5f789 100644 --- a/bench/bench_aocl_gemm/bench_lpgemm.c +++ b/bench/bench_aocl_gemm/bench_lpgemm.c @@ -1046,7 +1046,6 @@ GEN_MAT_MUL_ACC_CHK_DRV_FUNC(int8_t,int8_t,int8_t,int16_t,float,s8s8s16os8,s8s8s GEN_MAT_MUL_ACC_CHK_DRV_FUNC(bfloat16,int8_t,float,float,float,bf16s4f32of32,bf16bf16f32obf16) GEN_MAT_MUL_ACC_CHK_DRV_FUNC(bfloat16,int8_t,bfloat16,float,float,bf16s4f32obf16,bf16bf16f32obf16) - GEN_MAT_MUL_POST_OPS_CREATOR(int8_t,int16_t,float,int16_t,u8s8s16os16) GEN_MAT_MUL_POST_OPS_CREATOR(int8_t,int32_t,float,int32_t,u8s8s32os32) GEN_MAT_MUL_POST_OPS_CREATOR(bfloat16,float,float,bfloat16,bf16bf16f32of32) @@ -1431,6 +1430,8 @@ int main( int argc, char** argv ) strncpy( post_ops_str_dest, post_ops_str, POST_OPS_STR_LEN ); global_dscale_out = 'n'; global_pre_op = 'n'; + DSCALE_CLIP_MIN = INT_MIN; + DSCALE_CLIP_MAX = INT_MAX; GEN_FUNC_NAME(mat_mul_bench_main_,u8s8s32os32) ( fin, fout, stor_order, transa, transb, op_a, op_b, @@ -1462,6 +1463,8 @@ int main( int argc, char** argv ) strncpy( post_ops_str_dest, post_ops_str, POST_OPS_STR_LEN ); global_dscale_out = 'n'; global_pre_op = 'n'; + DSCALE_CLIP_MIN = INT_MIN; + DSCALE_CLIP_MAX = INT_MAX; if ( ( op_b != 'r' ) && ( op_b != 'R' ) ) { @@ -1492,12 +1495,15 @@ int main( int argc, char** argv ) post_ops_str_dest, FALSE ); } +#if 0 if ( ( strcmp( gemm_type_str, "u8s8s16os16" ) == 0 ) || ( strcmp( gemm_type_str, "*" ) == 0 ) ) { strncpy( post_ops_str_dest, post_ops_str, POST_OPS_STR_LEN ); global_dscale_out = 'n'; global_pre_op = 'n'; + DSCALE_CLIP_MIN = SHRT_MIN; + DSCALE_CLIP_MAX = SHRT_MAX; GEN_FUNC_NAME(mat_mul_bench_main_,u8s8s16os16) ( fin, fout, stor_order, transa, transb, op_a, op_b, @@ -1535,6 +1541,7 @@ int main( int argc, char** argv ) post_ops_str_dest, FALSE ); } +#endif if ( ( strcmp( gemm_type_str, "bf16bf16f32of32" ) == 0 ) || ( strcmp( gemm_type_str, "*" ) == 0 ) ) { @@ -1609,6 +1616,8 @@ int main( int argc, char** argv ) strncpy( post_ops_str_dest, post_ops_str, POST_OPS_STR_LEN ); global_dscale_out = 'n'; global_pre_op = 'n'; + DSCALE_CLIP_MIN = INT_MIN; + DSCALE_CLIP_MAX = INT_MAX; GEN_FUNC_NAME(mat_mul_bench_main_,s8s8s32os32) ( fin, fout, stor_order, transa, transb, op_a, op_b, @@ -1631,12 +1640,15 @@ int main( int argc, char** argv ) post_ops_str_dest, FALSE ); } +#if 0 if ( ( strcmp( gemm_type_str, "s8s8s16os16" ) == 0 ) || ( strcmp( gemm_type_str, "*" ) == 0 ) ) { strncpy( post_ops_str_dest, post_ops_str, POST_OPS_STR_LEN ); global_dscale_out = 'n'; global_pre_op = 'n'; + DSCALE_CLIP_MIN = SHRT_MIN; + DSCALE_CLIP_MAX = SHRT_MAX; GEN_FUNC_NAME(mat_mul_bench_main_,s8s8s16os16) ( fin, fout, stor_order, transa, transb, op_a, op_b, @@ -1659,6 +1671,7 @@ int main( int argc, char** argv ) post_ops_str_dest, FALSE ); } +#endif } } diff --git a/bench/bench_aocl_gemm/bench_lpgemm_helpers.h b/bench/bench_aocl_gemm/bench_lpgemm_helpers.h index 23efacb14..207c0a16b 100644 --- a/bench/bench_aocl_gemm/bench_lpgemm_helpers.h +++ b/bench/bench_aocl_gemm/bench_lpgemm_helpers.h @@ -43,13 +43,14 @@ #include #include #include +#include #include "blis.h" // Used to clip downscaled output, will be set in the main loop based // on the accumulation and C data type. -int64_t DSCALE_CLIP_MIN = 0; -int64_t DSCALE_CLIP_MAX = 0; +int64_t DSCALE_CLIP_MIN = INT_MIN; +int64_t DSCALE_CLIP_MAX = INT_MAX; // Mode can be one of the follwoing: // 1. p - performance, used for benchmarks. @@ -434,16 +435,16 @@ static inline void mat_mul_get_output_type_valfloatbfloat16 float* temp_accum ) { - /* Fix for rounding bias. */ - uint32_t inter_temp; - memcpy( &inter_temp, temp_accum, sizeof( float ) ); + /* Fix for rounding bias. */ + uint32_t inter_temp; + memcpy( &inter_temp, temp_accum, sizeof( float ) ); - /* Check if 16th bit is set */ - uint32_t tlsb = ( inter_temp & ( uint32_t )0x00010000 ) > 16; + /* Check if 16th bit is set */ + uint32_t tlsb = ( inter_temp & ( uint32_t )0x00010000 ) > 16; - /* Adding rounding bias. */ - uint32_t rounded = inter_temp + ( uint32_t )0x00007FFF + tlsb; - memcpy( temp_accum, &rounded, sizeof( float ) ); + /* Adding rounding bias. */ + uint32_t rounded = inter_temp + ( uint32_t )0x00007FFF + tlsb; + memcpy( temp_accum, &rounded, sizeof( float ) ); float_to_bf16( temp_accum, out_temp_accum ); } @@ -772,6 +773,16 @@ static inline aocl_post_op* lpgemm_create_post_ops_struct_ ## BLAS_SFX \ is_scalar_scale = TRUE; \ } \ } \ + else if ( strcmp( ops_tok, "zp" ) == 0 ) \ + { \ + ops_tok = strtok( NULL, ", " ); \ + str_tolower( ops_tok ); \ + if ( ( strcmp( ops_tok, "scalar" ) == 0 ) || \ + ( strcmp( ops_tok, "s" ) == 0 ) ) \ + { \ + is_scalar_zp = TRUE; \ + } \ + } \ else if ( strcmp( ops_tok, "matrix_add" ) == 0 ) \ { \ post_ops->seq_vector[cur_op_index] = MATRIX_ADD; \ @@ -812,15 +823,15 @@ static inline aocl_post_op* lpgemm_create_post_ops_struct_ ## BLAS_SFX \ } \ else if ( strcmp( ops_tok, "pre_op_zp" ) == 0 ) \ { \ - ops_tok = strtok( NULL, ", " ); \ + ops_tok = strtok( NULL, ", " ); \ str_tolower( ops_tok ); \ if ( ( strcmp( ops_tok, "scalar" ) == 0 ) || \ ( strcmp( ops_tok, "s" ) == 0 ) ) \ { \ /* set scalar zp */\ zp_vec_length = 1; \ - }else if ( ( strcmp( ops_tok, "vector" ) == 0 ) || \ - ( strcmp( ops_tok, "v" ) == 0 ) ) \ + } \ + else \ { \ /* set vector zp */\ zp_vec_length = n; \ @@ -835,8 +846,8 @@ static inline aocl_post_op* lpgemm_create_post_ops_struct_ ## BLAS_SFX \ { \ /* set scalar scale */\ is_pre_op_scale_scalar = TRUE; \ - }else if ( ( strcmp( ops_tok, "vector" ) == 0 ) || \ - ( strcmp( ops_tok, "v" ) == 0 ) ) \ + } \ + else \ { \ /* set vector scale */\ is_pre_op_scale_scalar = FALSE; \ @@ -1144,10 +1155,11 @@ static inline aocl_post_op* lpgemm_create_post_ops_struct_ ## BLAS_SFX \ if ( post_ops->pre_ops == NULL ) { goto err_handler; } \ \ dim_t num_groups = 1; \ - if(quant_group_size == 0) \ + if (quant_group_size == 0) \ { \ post_ops->pre_ops->group_size = k; \ - }else \ + } \ + else \ { \ post_ops->pre_ops->group_size = quant_group_size; \ if(is_group_quant) \ @@ -1157,6 +1169,10 @@ static inline aocl_post_op* lpgemm_create_post_ops_struct_ ## BLAS_SFX \ } \ \ ( post_ops->pre_ops )->b_zp = NULL; \ + if ( zp_vec_length == 0 ) \ + { \ + zp_vec_length = n; \ + } \ if( zp_vec_length != 0 ) \ { \ ( post_ops->pre_ops )->b_zp = malloc( sizeof( aocl_pre_op_zp ) ); \