From d8d4499e54ea14dc09f7e4cd6d2a6880d7d07f59 Mon Sep 17 00:00:00 2001 From: Kiran Varaganti Date: Mon, 14 Nov 2022 07:14:24 +0000 Subject: [PATCH] AVX2 dgemm kernel optimization for AOCC Details: k0 is always positive in bli_dgemm_haswell_asm_6x8(), the operation involved with k0 is typecasted to uint64_t to enable AOCC generate optimized code. Thanks for Jini Susan (jinisusan.george@amd.com) from compiler team for suggesting this change. Similar change was applied to sgemm, cgemm and zgemm kernels. Change-Id: I423c949e0c1835652142a6931dadf4a7d190aeb9 --- kernels/haswell/3/bli_gemm_haswell_asm_d6x8.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/kernels/haswell/3/bli_gemm_haswell_asm_d6x8.c b/kernels/haswell/3/bli_gemm_haswell_asm_d6x8.c index 79625519c..1515f292e 100644 --- a/kernels/haswell/3/bli_gemm_haswell_asm_d6x8.c +++ b/kernels/haswell/3/bli_gemm_haswell_asm_d6x8.c @@ -5,7 +5,7 @@ libraries. Copyright (C) 2014, The University of Texas at Austin - Copyright (C) 2018 - 2022, Advanced Micro Devices, Inc.All rights reserved. + Copyright (C) 2018 - 2023, Advanced Micro Devices, Inc.All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are @@ -95,8 +95,8 @@ void bli_sgemm_haswell_asm_6x16 // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. - uint64_t k_iter = k0 / 4; - uint64_t k_left = k0 % 4; + uint64_t k_iter = (uint64_t)k0 / 4; + uint64_t k_left = (uint64_t)k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; @@ -957,8 +957,8 @@ void bli_dgemm_haswell_asm_6x8 // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. - uint64_t k_iter = k0 / 4; - uint64_t k_left = k0 % 4; + uint64_t k_iter = (uint64_t)k0/4; + uint64_t k_left = (uint64_t)k0%4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; @@ -1720,8 +1720,8 @@ void bli_cgemm_haswell_asm_3x8 // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. - uint64_t k_iter = k0 / 4; - uint64_t k_left = k0 % 4; + uint64_t k_iter = (uint64_t)k0 / 4; + uint64_t k_left = (uint64_t)k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0; @@ -2249,8 +2249,8 @@ void bli_zgemm_haswell_asm_3x4 // Typecast local copies of integers in case dim_t and inc_t are a // different size than is expected by load instructions. - uint64_t k_iter = k0 / 4; - uint64_t k_left = k0 % 4; + uint64_t k_iter = (uint64_t)k0 / 4; + uint64_t k_left = (uint64_t)k0 % 4; uint64_t rs_c = rs_c0; uint64_t cs_c = cs_c0;