mirror of
https://github.com/amd/blis.git
synced 2026-04-19 23:28:52 +00:00
Skip clearing temp microtile in gemmlike sandbox.
Details:
- Removed code from gemmlike sandbox files bls_gemm_bp_var1.c and
bls_gemm_bp_var2.c that initializes the elements of the temporary
microtile to zero. This code, introduced recently in 7f7d726, did
not actually fix any bug (despite that commit's log entry). The
microtile does not need to be initialized because it is completely
overwritten by a "beta = 0" invocation of gemm prior to it being
read. Any NaNs or Infs present at the outset would have no impact
on the output matrix C. Thanks to Devin Matthews for reminding me
of this.
This commit is contained in:
@@ -230,9 +230,6 @@ void PASTECH2(bls_,ch,varname) \
|
||||
thrinfo_t* restrict thread_pa = NULL; \
|
||||
thrinfo_t* restrict thread_jr = NULL; \
|
||||
thrinfo_t* restrict thread_ir = NULL; \
|
||||
\
|
||||
/* Clear the temporary C buffer in case it has any infs or NaNs. */ \
|
||||
PASTEMAC(ch,set0s_mxn)( MR, NR, ct, rs_ct, cs_ct ); \
|
||||
\
|
||||
/* Identify the current thrinfo_t node and then grow the tree. */ \
|
||||
thread_jc = thread; \
|
||||
|
||||
@@ -538,12 +538,6 @@ void PASTECH2(bls_,ch,varname) \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
ctype zero = *PASTEMAC(ch,0); \
|
||||
\
|
||||
/* Clear the temporary C buffer in case it has any infs or NaNs.
|
||||
NOTE: This initialization should really be done statically since
|
||||
var2 executes this microkernel wrapper many times, and the overhead
|
||||
of touching the temporary microtile adds up. */ \
|
||||
PASTEMAC(ch,set0s_mxn)( MR, NR, ct, rs_ct, cs_ct ); \
|
||||
\
|
||||
/* Handle interior and edge cases separately. */ \
|
||||
if ( mr_cur == MR && nr_cur == NR ) \
|
||||
|
||||
Reference in New Issue
Block a user