mirror of
https://github.com/amd/blis.git
synced 2026-04-25 18:18:51 +00:00
Fixed bugs in cpackm kernels, gemmlike code.
Details: - Fixed intermittent bugs in bli_packm_haswell_asm_c3xk.c and bli_packm_haswell_asm_c8xk.c whereby the imaginary component of the kappa scalar was incorrectly loaded at an offset of 8 bytes (instead of 4 bytes) from the real component. This was almost certainly a copy- paste bug carried over from the corresonding zpackm kernels. Thanks to Devin Matthews for bringing this to my attention. - Added missing code to gemmlike sandbox files bls_gemm_bp_var1.c and bls_gemm_bp_var2.c that initializes the elements of the temporary microtile to zero. (This bug was never observed in output but rather noticed analytically. It probably would have also manifested as intermittent failures, this time involving edge cases.) - Minor commented-out/disabled changes to testsuite/src/test_gemm.c relating to debugging. Change-Id: I899e20df203806717fb5270b5f3dd0bf1f685011
This commit is contained in:
committed by
Dipal M Zambare
parent
6d4d6a7514
commit
2a81437bd8
@@ -230,6 +230,9 @@ void PASTECH2(bls_,ch,varname) \
|
||||
thrinfo_t* restrict thread_pa = NULL; \
|
||||
thrinfo_t* restrict thread_jr = NULL; \
|
||||
thrinfo_t* restrict thread_ir = NULL; \
|
||||
\
|
||||
/* Clear the temporary C buffer in case it has any infs or NaNs. */ \
|
||||
PASTEMAC(ch,set0s_mxn)( MR, NR, ct, rs_ct, cs_ct ); \
|
||||
\
|
||||
/* Identify the current thrinfo_t node and then grow the tree. */ \
|
||||
thread_jc = thread; \
|
||||
|
||||
@@ -538,6 +538,12 @@ void PASTECH2(bls_,ch,varname) \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
ctype zero = *PASTEMAC(ch,0); \
|
||||
\
|
||||
/* Clear the temporary C buffer in case it has any infs or NaNs.
|
||||
NOTE: This initialization should really be done statically since
|
||||
var2 executes this microkernel wrapper many times, and the overhead
|
||||
of touching the temporary microtile adds up. */ \
|
||||
PASTEMAC(ch,set0s_mxn)( MR, NR, ct, rs_ct, cs_ct ); \
|
||||
\
|
||||
/* Handle interior and edge cases separately. */ \
|
||||
if ( mr_cur == MR && nr_cur == NR ) \
|
||||
|
||||
@@ -176,17 +176,17 @@ void PASTECH2(bls_,ch,varname) \
|
||||
cntx \
|
||||
); \
|
||||
} \
|
||||
\
|
||||
p_begin += ps_p; \
|
||||
\
|
||||
/*
|
||||
if ( row_stored ) \
|
||||
PASTEMAC(ch,fprintm)( stdout, "packm_sup_var1: b packed", panel_len_max, panel_dim_max, \
|
||||
p_use, rs_p, cs_p, "%5.2f", "" ); \
|
||||
if ( !row_stored ) \
|
||||
PASTEMAC(ch,fprintm)( stdout, "packm_sup_var1: a packed", panel_dim_max, panel_len_max, \
|
||||
p_use, rs_p, cs_p, "%5.2f", "" ); \
|
||||
PASTEMAC(ch,fprintm)( stdout, "packm_var1: a packed", panel_dim_max, panel_len_max, \
|
||||
p_use, rs_p, cs_p, "%5.2f", "" ); \
|
||||
else \
|
||||
PASTEMAC(ch,fprintm)( stdout, "packm_var1: b packed", panel_len_max, panel_dim_max, \
|
||||
p_use, rs_p, cs_p, "%5.2f", "" ); \
|
||||
*/ \
|
||||
\
|
||||
p_begin += ps_p; \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user