mirror of
https://github.com/amd/blis.git
synced 2026-04-19 23:28:52 +00:00
Fixed copy-paste bug in bli_spackm_6xk_bb4_ref().
Details: - Fixed a copy-paste bug in the new bli_spackm_6xk_bb4_ref() that manifested as failures in single-precision real level-3 operations. Also replaced the duplication factor constants with a const-qualifed varialbe, dfac, so that this won't happen again. - Changed NC for single-precision real from 4080 to 8160 so that the packed matrix B will have the same byte footprint in both single and double real.
This commit is contained in:
@@ -192,7 +192,7 @@ void bli_cntx_init_haswell( cntx_t* cntx )
|
||||
24, 12, 6, 6 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 72, 36 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 2076 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8160, 4080, 4080, 2076 );
|
||||
#endif
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, 8, 8 );
|
||||
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, 8, 8 );
|
||||
|
||||
@@ -55,6 +55,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
const dim_t dfac = 2; \
|
||||
\
|
||||
/* Handle the packing of B (column panel schemas) separately from packing
|
||||
of A (row panel schemas). */ \
|
||||
@@ -162,7 +164,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
n, \
|
||||
kappa, \
|
||||
a, inca, lda, \
|
||||
p, 2, ldp \
|
||||
p, dfac, ldp \
|
||||
); \
|
||||
\
|
||||
/* if ( cdim < mnr ) */ \
|
||||
@@ -171,13 +173,13 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const dim_t m_edge = mnr - cdim; \
|
||||
const dim_t n_edge = n_max; \
|
||||
ctype* restrict p_cast = p; \
|
||||
ctype* restrict p_edge = p_cast + (i )*2; \
|
||||
ctype* restrict p_edge = p_cast + (i )*dfac; \
|
||||
\
|
||||
PASTEMAC(ch,set0bbs_mxn) \
|
||||
( \
|
||||
m_edge, \
|
||||
n_edge, \
|
||||
p_edge, 2, ldp \
|
||||
p_edge, dfac, ldp \
|
||||
); \
|
||||
} \
|
||||
} \
|
||||
@@ -194,7 +196,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
( \
|
||||
m_edge, \
|
||||
n_edge, \
|
||||
p_edge, 2, ldp \
|
||||
p_edge, dfac, ldp \
|
||||
); \
|
||||
} \
|
||||
} \
|
||||
@@ -339,6 +341,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
\
|
||||
const dim_t dfac = 4; \
|
||||
\
|
||||
/* Handle the packing of B (column panel schemas) separately from packing
|
||||
of A (row panel schemas). */ \
|
||||
@@ -494,7 +498,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
n, \
|
||||
kappa, \
|
||||
a, inca, lda, \
|
||||
p, 4, ldp \
|
||||
p, dfac, ldp \
|
||||
); \
|
||||
\
|
||||
/* if ( cdim < mnr ) */ \
|
||||
@@ -503,13 +507,13 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
const dim_t m_edge = mnr - cdim; \
|
||||
const dim_t n_edge = n_max; \
|
||||
ctype* restrict p_cast = p; \
|
||||
ctype* restrict p_edge = p_cast + (i )*2; \
|
||||
ctype* restrict p_edge = p_cast + (i )*dfac; \
|
||||
\
|
||||
PASTEMAC(ch,set0bbs_mxn) \
|
||||
( \
|
||||
m_edge, \
|
||||
n_edge, \
|
||||
p_edge, 4, ldp \
|
||||
p_edge, dfac, ldp \
|
||||
); \
|
||||
} \
|
||||
} \
|
||||
@@ -526,7 +530,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
|
||||
( \
|
||||
m_edge, \
|
||||
n_edge, \
|
||||
p_edge, 4, ldp \
|
||||
p_edge, dfac, ldp \
|
||||
); \
|
||||
} \
|
||||
} \
|
||||
|
||||
Reference in New Issue
Block a user