Pacify 'restrict' warning in gemmtrsm4m1 ref ukr.

Details:
- Previously, some versions of gcc would complain that the same
  pointer, one_r, is being passed in for both alpha and beta in the
  fourth call to the real gemm ukernel in bli_gemmtrsm4m1_ref.c. This
  is understandable since the compiler knows that the real gemm ukernel
  qualifies all of its floating-point arguments (including alpha and
  beta) with restrict. A small hack has been inserted into the file
  that defines a new variable to store the value 1.0, which is now used
  in lieu of one_r for beta in the fourth call to the real gemm ukernel,
  which should pacify the compiler now. Thanks to Dave Love for
  reporting this issue (#328) and for Devin Matthews for offering his
  'restrict' expertise.
This commit is contained in:
Field G. Van Zee
2019-08-21 13:22:12 -05:00
parent e8c6281f13
commit 8122f59745
2 changed files with 10 additions and 2 deletions

2
configure vendored
View File

@@ -1530,7 +1530,7 @@ check_compiler_version_ranges()
gcc_older_than_4_9_0='no'
gcc_older_than_6_1_0='no'
echo "${script_name}: checking for consequential version ranges for ${cc} ${cc_version}."
echo "${script_name}: checking ${cc} ${cc_version} against known consequential version ranges."
# gcc
if [ "x${cc_vendor}" = "xgcc" ]; then

View File

@@ -84,6 +84,14 @@ void PASTEMAC3(ch,opname,arch,suf) \
\
ctype_r* restrict one_r = PASTEMAC(chr,1); \
ctype_r* restrict minus_one_r = PASTEMAC(chr,m1); \
\
/* A hack to avoid a 'restrict' warning triggered by passing in the
same address (one_r) for both alpha and beta when calling the last
of the four matrix products. We now use one_r for alpha and this
new local variable, onel, for beta. */ \
ctype onel; \
ctype_r* restrict onel_r = &onel; \
PASTEMAC(chr,set1s)( onel ); \
\
ctype_r alpha_r = PASTEMAC(ch,real)( *alpha ); \
ctype_r alpha_i = PASTEMAC(ch,imag)( *alpha ); \
@@ -187,7 +195,7 @@ PASTEMAC(chr,fprintm)( stdout, "gemmtrsm4m1_l_ukr: bx111p_i", k+m, n, \
one_r, \
a1x_i, \
bx1_i, \
one_r, \
onel_r, \
b11_r, rs_b, cs_b, \
data, \
cntx \