Fixed copy-paste bug in bli_spackm_6xk_bb4_ref().

Details:
- Fixed a copy-paste bug in the new bli_spackm_6xk_bb4_ref() that
  manifested as failures in single-precision real level-3 operations.
  Also replaced the duplication factor constants with a const-qualifed
  varialbe, dfac, so that this won't happen again.
- Changed NC for single-precision real from 4080 to 8160 so that the
  packed matrix B will have the same byte footprint in both single
  and double real.
This commit is contained in:
Field G. Van Zee
2019-11-14 13:05:28 -06:00
parent 8f399c8940
commit fb8bef9982
2 changed files with 13 additions and 9 deletions

View File

@@ -192,7 +192,7 @@ void bli_cntx_init_haswell( cntx_t* cntx )
24, 12, 6, 6 );
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 72, 36 );
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 );
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 2076 );
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 8160, 4080, 4080, 2076 );
#endif
bli_blksz_init_easy( &blkszs[ BLIS_AF ], 8, 8, 8, 8 );
bli_blksz_init_easy( &blkszs[ BLIS_DF ], 8, 8, 8, 8 );

View File

@@ -55,6 +55,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
const dim_t dfac = 2; \
\
/* Handle the packing of B (column panel schemas) separately from packing
of A (row panel schemas). */ \
@@ -162,7 +164,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
n, \
kappa, \
a, inca, lda, \
p, 2, ldp \
p, dfac, ldp \
); \
\
/* if ( cdim < mnr ) */ \
@@ -171,13 +173,13 @@ void PASTEMAC3(ch,opname,arch,suf) \
const dim_t m_edge = mnr - cdim; \
const dim_t n_edge = n_max; \
ctype* restrict p_cast = p; \
ctype* restrict p_edge = p_cast + (i )*2; \
ctype* restrict p_edge = p_cast + (i )*dfac; \
\
PASTEMAC(ch,set0bbs_mxn) \
( \
m_edge, \
n_edge, \
p_edge, 2, ldp \
p_edge, dfac, ldp \
); \
} \
} \
@@ -194,7 +196,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
( \
m_edge, \
n_edge, \
p_edge, 2, ldp \
p_edge, dfac, ldp \
); \
} \
} \
@@ -339,6 +341,8 @@ void PASTEMAC3(ch,opname,arch,suf) \
ctype* restrict kappa_cast = kappa; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
\
const dim_t dfac = 4; \
\
/* Handle the packing of B (column panel schemas) separately from packing
of A (row panel schemas). */ \
@@ -494,7 +498,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
n, \
kappa, \
a, inca, lda, \
p, 4, ldp \
p, dfac, ldp \
); \
\
/* if ( cdim < mnr ) */ \
@@ -503,13 +507,13 @@ void PASTEMAC3(ch,opname,arch,suf) \
const dim_t m_edge = mnr - cdim; \
const dim_t n_edge = n_max; \
ctype* restrict p_cast = p; \
ctype* restrict p_edge = p_cast + (i )*2; \
ctype* restrict p_edge = p_cast + (i )*dfac; \
\
PASTEMAC(ch,set0bbs_mxn) \
( \
m_edge, \
n_edge, \
p_edge, 4, ldp \
p_edge, dfac, ldp \
); \
} \
} \
@@ -526,7 +530,7 @@ void PASTEMAC3(ch,opname,arch,suf) \
( \
m_edge, \
n_edge, \
p_edge, 4, ldp \
p_edge, dfac, ldp \
); \
} \
} \