Compiler warnings fixes (2)

Fix compiler warning messages in LPGEMM code: - Removed extraneous parentheses in aocl_batch_gemm_s8s8s32os32.c - Removed unused variables in lpgemv_{m,n}_kernel_s8_grp_amd512vnni.c - Changed ERR_UBOUND in math_utils_avx2.h and math_utils_avx512.h to match how it is specified in AOCL libm erff.c AMD-Internal: [CPUPL-6579]
2026-04-20 07:38:53 +00:00 · 2025-09-17 18:28:34 +01:00
parent 31aba514fe
commit e59eabaf58
5 changed files with 10 additions and 24 deletions
--- a/addon/aocl_gemm/aocl_batch_gemm_s8s8s32os32.c
+++ b/addon/aocl_gemm/aocl_batch_gemm_s8s8s32os32.c
@@ -181,7 +181,7 @@ AOCL_BGEMM_MATMUL(int8_t,int8_t,int32_t,int32_t,s8s8s32os32)
 				// has to be transposed to row-major format. In col-maj case, inputs are
 				// swapped and B becomes A from kernel point of view. Hence, if B is packed, 
 				// set B to unpacked and proceed with GEMM.
-				if ((mtag_b[gs_i] == PACK))
+				if ( mtag_b[gs_i] == PACK )
 				{
 					mtag_b[gs_i] = UNPACKED;
 				}
@@ -444,7 +444,7 @@ AOCL_BGEMM_MATMUL(int8_t,int8_t,int8_t,int32_t,s8s8s32os8)
 				// has to be transposed to row-major format. In col-maj case, inputs are
 				// swapped and B becomes A from kernel point of view. Hence, if B is packed, 
 				// set B to unpacked and proceed with GEMM.
-				if ((mtag_b[gs_i] == PACK))
+				if ( mtag_b[gs_i] == PACK )
 				{
 					mtag_b[gs_i] = UNPACKED;
 				}
@@ -694,7 +694,7 @@ AOCL_BGEMM_MATMUL(int8_t,int8_t,float,int32_t,s8s8s32of32)
 				// has to be transposed to row-major format. In col-maj case, inputs are
 				// swapped and B becomes A from kernel point of view. Hence, if B is packed, 
 				// set B to unpacked and proceed with GEMM.
-				if ((mtag_b[gs_i] == PACK))
+				if ( mtag_b[gs_i] == PACK )
 				{
 					mtag_b[gs_i] = UNPACKED;
 				}
@@ -964,7 +964,7 @@ AOCL_BGEMM_MATMUL(int8_t,int8_t,bfloat16,int32_t,s8s8s32obf16)
 				// has to be transposed to row-major format. In col-maj case, inputs are
 				// swapped and B becomes A from kernel point of view. Hence, if B is packed, 
 				// set B to unpacked and proceed with GEMM.
-				if ((mtag_b[gs_i] == PACK))
+				if ( mtag_b[gs_i] == PACK )
 				{
 					mtag_b[gs_i] = UNPACKED;
 				}
@@ -1231,7 +1231,7 @@ AOCL_BGEMM_MATMUL(int8_t,int8_t,uint8_t,int32_t,s8s8s32ou8)
 				// has to be transposed to row-major format. In col-maj case, inputs are
 				// swapped and B becomes A from kernel point of view. Hence, if B is packed, 
 				// set B to unpacked and proceed with GEMM.
-				if ((mtag_b[gs_i] == PACK))
+				if ( mtag_b[gs_i] == PACK )
 				{
 					mtag_b[gs_i] = UNPACKED;
 				}
--- a/kernels/zen/lpgemm/math_utils_avx2.h
+++ b/kernels/zen/lpgemm/math_utils_avx2.h
@@ -4,7 +4,7 @@
   An object-based framework for developing high-performance BLAS-like
   libraries.

-   Copyright (C) 2023 - 2024, Advanced Micro Devices, Inc. All rights reserved.
+   Copyright (C) 2023 - 2025, Advanced Micro Devices, Inc. All rights reserved.

   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions are
@@ -66,7 +66,7 @@
 #define erf_c14  _mm256_set1_pd((-0x1.155445e2e006723066d72d22ddcp-20))
 #define erf_c15  _mm256_set1_pd((0x1.c6a4181da4ef76f22bd39bb5dcp-25))

-#define ERF_UBOUND    (0x407AD447)  // 3.402823466E+38F
+#define ERF_UBOUND    3.9192059040069580078125f

 //Trignometric EXP, TANH and ERF functions for AVX2

--- a/kernels/zen4/lpgemm/math_utils_avx512.h
+++ b/kernels/zen4/lpgemm/math_utils_avx512.h
@@ -4,7 +4,7 @@
   An object-based framework for developing high-performance BLAS-like
   libraries.

-   Copyright (C) 2023 - 2024, Advanced Micro Devices, Inc. All rights reserved.
+   Copyright (C) 2023 - 2025, Advanced Micro Devices, Inc. All rights reserved.

   Redistribution and use in source and binary forms, with or without
   modification, are permitted provided that the following conditions are
@@ -105,8 +105,8 @@
 #define erf512_c14  _mm512_set1_pd((-0x1.155445e2e006723066d72d22ddcp-20))
 #define erf512_c15  _mm512_set1_pd((0x1.c6a4181da4ef76f22bd39bb5dcp-25))

-#define ERF512_UBOUND    (0x407AD447)  // 3.402823466E+38F
-#define ERF512_BOUND     _mm512_set1_ps((float)(3.91920638084411621F))
+#define ERF512_UBOUND    3.9192059040069580078125f
+#define ERF512_BOUND     _mm512_set1_ps((float)(ERF512_UBOUND))

 typedef union {
    float    f;
--- a/kernels/zen4/lpgemm/s8s8s32/lpgemv_m_kernel_s8_grp_amd512vnni.c
+++ b/kernels/zen4/lpgemm/s8s8s32/lpgemv_m_kernel_s8_grp_amd512vnni.c
@@ -144,12 +144,6 @@ LPGEMV_M_EQ1_KERN2(int8_t,int8_t,int32_t,s8s8s32os32_sym_quant)
                dim_t k_iter = kg0 / 16;
                dim_t k_rem = k_full_pieces % 4;

-                dim_t kg0_updated = kg0;
-                if ( k_partial_pieces > 0 )
-                {
-                    kg0_updated += ( 4 - k_partial_pieces );
-                }
-
                b_use = b_pc + ( g_id * NR * group_size );

                // zero the accumulator registers
--- a/kernels/zen4/lpgemm/s8s8s32/lpgemv_n_kernel_s8_grp_amd512vnni.c
+++ b/kernels/zen4/lpgemm/s8s8s32/lpgemv_n_kernel_s8_grp_amd512vnni.c
@@ -373,7 +373,6 @@ LPGEMV_N_EQ1_KERN2(int8_t,int8_t,int32_t,s8s8s32os32_sym_quant)
        else    // Handle M fringe cases when mr0 < MR.
        {
            const int8_t* a_use_fringe = a_use;
-            dim_t regidx = 0;

            // Dot-product kernel for m_fringe >= 8; [8, 16).
            if ( mr0_use >= 8 )
@@ -468,8 +467,6 @@ LPGEMV_N_EQ1_KERN2(int8_t,int8_t,int32_t,s8s8s32os32_sym_quant)
                    zmm8 = _mm512_inserti32x4( zmm8, xmm0, 0 );
                    zmm8 = _mm512_inserti32x4( zmm8, xmm1, 1 );

-                    // regidx = 2;
-
                    int32_t* bsumptr = post_ops_attr.b_col_sum_vec + group;

                    zmm0 = _mm512_set1_epi32( *bsumptr );
@@ -628,8 +625,6 @@ LPGEMV_N_EQ1_KERN2(int8_t,int8_t,int32_t,s8s8s32os32_sym_quant)
                                    ymm5, ymm1, ymm2, ymm3, xmm2 )

                    // Compose outputs into one zmm to perform post-ops
-                    // if( regidx == 0 ) zmm8 = _mm512_inserti32x4( zmm8, xmm2, 0 );
-                    // else zmm8 = _mm512_inserti32x4( zmm8, xmm2, 2 );
                    zmm8 = _mm512_inserti32x4( zmm8, xmm2, 0 );

                    int32_t* bsumptr = post_ops_attr.b_col_sum_vec + group;
@@ -715,7 +710,6 @@ LPGEMV_N_EQ1_KERN2(int8_t,int8_t,int32_t,s8s8s32os32_sym_quant)
                    f32_acc0 = _mm512_maskz_add_ps(k2, f32_acc0, inter0 );
                }   // group loop

-                regidx++;
                a_use = a_use_fringe + 4 * rs_a;
                a_use_fringe = a_use;
                b_use = b;
@@ -889,7 +883,6 @@ LPGEMV_N_EQ1_KERN2(int8_t,int8_t,int32_t,s8s8s32os32_sym_quant)
                a_use = a_use_fringe + 2 * rs_a;
                a_use_fringe = a_use;
                b_use = b;
-                regidx++;
            }
            else if ( mr0_use == 1 )
            {
@@ -1045,7 +1038,6 @@ LPGEMV_N_EQ1_KERN2(int8_t,int8_t,int32_t,s8s8s32os32_sym_quant)
                a_use = a_use_fringe + 1 * rs_a;
                a_use_fringe = a_use;
                b_use = b;
-                regidx++;
            }
        }