mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
AVX2 dgemm kernel optimization for AOCC
Details: k0 is always positive in bli_dgemm_haswell_asm_6x8(), the operation involved with
k0 is typecasted to uint64_t to enable AOCC generate optimized code.
Thanks for Jini Susan (jinisusan.george@amd.com) from compiler team for suggesting
this change. Similar change was applied to sgemm, cgemm and zgemm kernels.
Change-Id: I423c949e0c1835652142a6931dadf4a7d190aeb9
This commit is contained in:
@@ -5,7 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2018 - 2022, Advanced Micro Devices, Inc.All rights reserved.
|
||||
Copyright (C) 2018 - 2023, Advanced Micro Devices, Inc.All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -95,8 +95,8 @@ void bli_sgemm_haswell_asm_6x16
|
||||
|
||||
// Typecast local copies of integers in case dim_t and inc_t are a
|
||||
// different size than is expected by load instructions.
|
||||
uint64_t k_iter = k0 / 4;
|
||||
uint64_t k_left = k0 % 4;
|
||||
uint64_t k_iter = (uint64_t)k0 / 4;
|
||||
uint64_t k_left = (uint64_t)k0 % 4;
|
||||
uint64_t rs_c = rs_c0;
|
||||
uint64_t cs_c = cs_c0;
|
||||
|
||||
@@ -957,8 +957,8 @@ void bli_dgemm_haswell_asm_6x8
|
||||
|
||||
// Typecast local copies of integers in case dim_t and inc_t are a
|
||||
// different size than is expected by load instructions.
|
||||
uint64_t k_iter = k0 / 4;
|
||||
uint64_t k_left = k0 % 4;
|
||||
uint64_t k_iter = (uint64_t)k0/4;
|
||||
uint64_t k_left = (uint64_t)k0%4;
|
||||
uint64_t rs_c = rs_c0;
|
||||
uint64_t cs_c = cs_c0;
|
||||
|
||||
@@ -1720,8 +1720,8 @@ void bli_cgemm_haswell_asm_3x8
|
||||
|
||||
// Typecast local copies of integers in case dim_t and inc_t are a
|
||||
// different size than is expected by load instructions.
|
||||
uint64_t k_iter = k0 / 4;
|
||||
uint64_t k_left = k0 % 4;
|
||||
uint64_t k_iter = (uint64_t)k0 / 4;
|
||||
uint64_t k_left = (uint64_t)k0 % 4;
|
||||
uint64_t rs_c = rs_c0;
|
||||
uint64_t cs_c = cs_c0;
|
||||
|
||||
@@ -2249,8 +2249,8 @@ void bli_zgemm_haswell_asm_3x4
|
||||
|
||||
// Typecast local copies of integers in case dim_t and inc_t are a
|
||||
// different size than is expected by load instructions.
|
||||
uint64_t k_iter = k0 / 4;
|
||||
uint64_t k_left = k0 % 4;
|
||||
uint64_t k_iter = (uint64_t)k0 / 4;
|
||||
uint64_t k_left = (uint64_t)k0 % 4;
|
||||
uint64_t rs_c = rs_c0;
|
||||
uint64_t cs_c = cs_c0;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user