Minor tweaks to gemmlike sandbox.

Details:
- In the gemmlike sandbox, changed the loop index variable of inner
  loop of packm_cxk() from 'd' to 'i' (and likewise for the
  corresponding inlined code within packm_var2()).
- Pack matrices A and B using packm_var1() instead of packm_var2().
This commit is contained in:
Field G. Van Zee
2021-08-19 16:06:46 -05:00
parent 3eccfd456e
commit 3b275f810b
4 changed files with 18 additions and 18 deletions

View File

@@ -300,7 +300,7 @@ void PASTECH2(bls_,ch,opname) \
\
/* Pack matrix A to the destination buffer chosen above. Here, the packed
matrix is stored to column-stored MR x k micropanels. */ \
PASTECH2(bls_,ch,packm_var2) \
PASTECH2(bls_,ch,packm_var1) \
( \
conj, \
schema, \

View File

@@ -300,7 +300,7 @@ void PASTECH2(bls_,ch,opname) \
\
/* Pack matrix B to the destination buffer chosen above. Here, the packed
matrix is stored to row-stored k x NR micropanels. */ \
PASTECH2(bls_,ch,packm_var2) \
PASTECH2(bls_,ch,packm_var1) \
( \
conj, \
schema, \

View File

@@ -167,12 +167,12 @@ void PASTECH2(bls_,ch,varname) \
{ \
for ( dim_t l = 0; l < panel_len; ++l ) \
{ \
for ( dim_t d = 0; d < panel_dim; ++d ) \
for ( dim_t i = 0; i < panel_dim; ++i ) \
{ \
ctype* cld = c_use + (l )*ldc + (d )*incc; \
ctype* pld = p_use + (l )*ldp + (d )*1; \
ctype* cli = c_use + (l )*ldc + (i )*incc; \
ctype* pli = p_use + (l )*ldp + (i )*1; \
\
PASTEMAC(ch,copyjs)( *cld, *pld ); \
PASTEMAC(ch,copyjs)( *cli, *pli ); \
} \
} \
} \
@@ -180,12 +180,12 @@ void PASTECH2(bls_,ch,varname) \
{ \
for ( dim_t l = 0; l < panel_len; ++l ) \
{ \
for ( dim_t d = 0; d < panel_dim; ++d ) \
for ( dim_t i = 0; i < panel_dim; ++i ) \
{ \
ctype* cld = c_use + (l )*ldc + (d )*incc; \
ctype* pld = p_use + (l )*ldp + (d )*1; \
ctype* cli = c_use + (l )*ldc + (i )*incc; \
ctype* pli = p_use + (l )*ldp + (i )*1; \
\
PASTEMAC(ch,copys)( *cld, *pld ); \
PASTEMAC(ch,copys)( *cli, *pli ); \
} \
} \
} \

View File

@@ -96,12 +96,12 @@ void PASTECH2(bls_,ch,opname) \
{ \
for ( dim_t l = 0; l < panel_len; ++l ) \
{ \
for ( dim_t d = 0; d < panel_dim; ++d ) \
for ( dim_t i = 0; i < panel_dim; ++i ) \
{ \
ctype* ald = a + (l )*lda + (d )*inca; \
ctype* pld = p + (l )*ldp + (d )*1; \
ctype* ali = a + (l )*lda + (i )*inca; \
ctype* pli = p + (l )*ldp + (i )*1; \
\
PASTEMAC(ch,copyjs)( *ald, *pld ); \
PASTEMAC(ch,copyjs)( *ali, *pli ); \
} \
} \
} \
@@ -109,12 +109,12 @@ void PASTECH2(bls_,ch,opname) \
{ \
for ( dim_t l = 0; l < panel_len; ++l ) \
{ \
for ( dim_t d = 0; d < panel_dim; ++d ) \
for ( dim_t i = 0; i < panel_dim; ++i ) \
{ \
ctype* ald = a + (l )*lda + (d )*inca; \
ctype* pld = p + (l )*ldp + (d )*1; \
ctype* ali = a + (l )*lda + (i )*inca; \
ctype* pli = p + (l )*ldp + (i )*1; \
\
PASTEMAC(ch,copys)( *ald, *pld ); \
PASTEMAC(ch,copys)( *ali, *pli ); \
} \
} \
} \