Use PASTEMAC in macro-kernels (over MAC2 or MAC3).

Details:
- Replaced multi-type invocations of copys_mxn, xpbys_mxn, etc. (PASTEMAC2
  and PASTEMAC3) with those that only use a single type (PASTEMAC).
- Added extra macros to bli_adds_mxn_uplo.h and bli_xpbys_mxn_uplo.h to
  accommodate above change.
- Fixed comment typo in bli_config.h files.
- Added .nfs* pattern to .gitignore.
This commit is contained in:
Field G. Van Zee
2013-04-25 12:06:12 -05:00
parent df80acf517
commit b6e24b23cb
13 changed files with 132 additions and 65 deletions

2
.gitignore vendored
View File

@@ -4,6 +4,8 @@
*~
# vim backup files
*.swp
# NFS file
.nfs*
# -- compiler-related --

View File

@@ -77,7 +77,7 @@
#define BLIS_DEFAULT_KC_Z 256
#define BLIS_DEFAULT_NC_Z 2048
// -- Ccache blocksize extensions (for optimizing edge cases) --
// -- Cache blocksize extensions (for optimizing edge cases) --
// NOTE: These cache blocksize "extensions" have the same constraints as
// the corresponding default blocksizes above.

View File

@@ -77,7 +77,7 @@
#define BLIS_DEFAULT_KC_Z 256
#define BLIS_DEFAULT_NC_Z 2048
// -- Ccache blocksize extensions (for optimizing edge cases) --
// -- Cache blocksize extensions (for optimizing edge cases) --
// NOTE: These cache blocksize "extensions" have the same constraints as
// the corresponding default blocksizes above.

View File

@@ -77,7 +77,7 @@
#define BLIS_DEFAULT_KC_Z 256
#define BLIS_DEFAULT_NC_Z 2048
// -- Ccache blocksize extensions (for optimizing edge cases) --
// -- Cache blocksize extensions (for optimizing edge cases) --
// NOTE: These cache blocksize "extensions" have the same constraints as
// the corresponding default blocksizes above.

View File

@@ -291,10 +291,10 @@ void PASTEMAC(ch,varname)( \
a2, b2 ); \
\
/* Scale the bottom edge of C and add the result from above. */ \
PASTEMAC3(ch,ch,ch,xpbys_mxn)( m_left, NR, \
ct, rs_ct, cs_ct, \
beta_cast, \
c11, rs_c, cs_c ); \
PASTEMAC(ch,xpbys_mxn)( m_left, NR, \
ct, rs_ct, cs_ct, \
beta_cast, \
c11, rs_c, cs_c ); \
} \
\
b1 += cstep_b; \
@@ -335,10 +335,10 @@ void PASTEMAC(ch,varname)( \
a2, b2 ); \
\
/* Scale the right edge of C and add the result from above. */ \
PASTEMAC3(ch,ch,ch,xpbys_mxn)( MR, n_left, \
ct, rs_ct, cs_ct, \
beta_cast, \
c11, rs_c, cs_c ); \
PASTEMAC(ch,xpbys_mxn)( MR, n_left, \
ct, rs_ct, cs_ct, \
beta_cast, \
c11, rs_c, cs_c ); \
\
a1 += rstep_a; \
c11 += rstep_c; \
@@ -361,10 +361,10 @@ void PASTEMAC(ch,varname)( \
a2, b2 ); \
\
/* Scale the bottom-right corner of C and add the result from above. */ \
PASTEMAC3(ch,ch,ch,xpbys_mxn)( m_left, n_left, \
ct, rs_ct, cs_ct, \
beta_cast, \
c11, rs_c, cs_c ); \
PASTEMAC(ch,xpbys_mxn)( m_left, n_left, \
ct, rs_ct, cs_ct, \
beta_cast, \
c11, rs_c, cs_c ); \
} \
} \
\

View File

@@ -294,11 +294,11 @@ void PASTEMAC(ch,varname)( \
a2, b2 ); \
\
/* Scale C and add the result to only the stored part. */ \
PASTEMAC3(ch,ch,ch,xpbys_mxn_l)( diagoffc_ij, \
m_cur, n_cur, \
ct, rs_ct, cs_ct, \
beta_cast, \
c11, rs_c, cs_c ); \
PASTEMAC(ch,xpbys_mxn_l)( diagoffc_ij, \
m_cur, n_cur, \
ct, rs_ct, cs_ct, \
beta_cast, \
c11, rs_c, cs_c ); \
} \
else if ( bli_is_strictly_below_diag_n( diagoffc_ij, m_cur, n_cur ) ) \
{ \
@@ -326,10 +326,10 @@ void PASTEMAC(ch,varname)( \
a2, b2 ); \
\
/* Scale the edge of C and add the result. */ \
PASTEMAC3(ch,ch,ch,xpbys_mxn)( m_cur, n_cur, \
ct, rs_ct, cs_ct, \
beta_cast, \
c11, rs_c, cs_c ); \
PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \
ct, rs_ct, cs_ct, \
beta_cast, \
c11, rs_c, cs_c ); \
} \
} \
\

View File

@@ -294,11 +294,11 @@ void PASTEMAC(ch,varname)( \
a2, b2 ); \
\
/* Scale C and add the result to only the stored part. */ \
PASTEMAC3(ch,ch,ch,xpbys_mxn_u)( diagoffc_ij, \
m_cur, n_cur, \
ct, rs_ct, cs_ct, \
beta_cast, \
c11, rs_c, cs_c ); \
PASTEMAC(ch,xpbys_mxn_u)( diagoffc_ij, \
m_cur, n_cur, \
ct, rs_ct, cs_ct, \
beta_cast, \
c11, rs_c, cs_c ); \
} \
else if ( bli_is_strictly_above_diag_n( diagoffc_ij, m_cur, n_cur ) ) \
{ \
@@ -326,10 +326,10 @@ void PASTEMAC(ch,varname)( \
a2, b2 ); \
\
/* Scale the edge of C and add the result. */ \
PASTEMAC3(ch,ch,ch,xpbys_mxn)( m_cur, n_cur, \
ct, rs_ct, cs_ct, \
beta_cast, \
c11, rs_c, cs_c ); \
PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \
ct, rs_ct, cs_ct, \
beta_cast, \
c11, rs_c, cs_c ); \
} \
} \
\

View File

@@ -333,9 +333,9 @@ void PASTEMAC(ch,varname)( \
else \
{ \
/* Copy edge elements of C to the temporary buffer. */ \
PASTEMAC2(ch,ch,copys_mxn)( m_cur, n_cur, \
c11, rs_c, cs_c, \
ct, rs_ct, cs_ct ); \
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
c11, rs_c, cs_c, \
ct, rs_ct, cs_ct ); \
\
/* Invoke the gemm micro-kernel. */ \
PASTEMAC(ch,ukrname)( k_a1011, \
@@ -347,9 +347,9 @@ void PASTEMAC(ch,varname)( \
a2, b2 ); \
\
/* Copy the result to the edge of C. */ \
PASTEMAC2(ch,ch,copys_mxn)( m_cur, n_cur, \
ct, rs_ct, cs_ct, \
c11, rs_c, cs_c ); \
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
ct, rs_ct, cs_ct, \
c11, rs_c, cs_c ); \
} \
\
a1 += k_a1011 * PACKMR; \
@@ -390,9 +390,9 @@ void PASTEMAC(ch,varname)( \
a2, b2 ); \
\
/* Add the result to the edge of C. */ \
PASTEMAC2(ch,ch,adds_mxn)( m_cur, n_cur, \
ct, rs_ct, cs_ct, \
c11, rs_c, cs_c ); \
PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \
ct, rs_ct, cs_ct, \
c11, rs_c, cs_c ); \
} \
\
a1 += rstep_a; \

View File

@@ -333,9 +333,9 @@ void PASTEMAC(ch,varname)( \
else \
{ \
/* Copy edge elements of C to the temporary buffer. */ \
PASTEMAC2(ch,ch,copys_mxn)( m_cur, n_cur, \
c11, rs_c, cs_c, \
ct, rs_ct, cs_ct ); \
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
c11, rs_c, cs_c, \
ct, rs_ct, cs_ct ); \
\
/* Invoke the gemm micro-kernel. */ \
PASTEMAC(ch,ukrname)( k_a1112, \
@@ -347,9 +347,9 @@ void PASTEMAC(ch,varname)( \
a2, b2 ); \
\
/* Copy the result to the edge of C. */ \
PASTEMAC2(ch,ch,copys_mxn)( m_cur, n_cur, \
ct, rs_ct, cs_ct, \
c11, rs_c, cs_c ); \
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
ct, rs_ct, cs_ct, \
c11, rs_c, cs_c ); \
} \
\
a1 += k_a1112 * PACKMR; \
@@ -390,9 +390,9 @@ void PASTEMAC(ch,varname)( \
a2, b2 ); \
\
/* Add the result to the edge of C. */ \
PASTEMAC2(ch,ch,adds_mxn)( m_cur, n_cur, \
ct, rs_ct, cs_ct, \
c11, rs_c, cs_c ); \
PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \
ct, rs_ct, cs_ct, \
c11, rs_c, cs_c ); \
} \
\
a1 += rstep_a; \

View File

@@ -361,9 +361,9 @@ void PASTEMAC(ch,varname)( \
a2, b2 ); \
\
/* Copy the result to the bottom edge of C. */ \
PASTEMAC2(ch,ch,copys_mxn)( m_cur, n_cur, \
ct, rs_ct, cs_ct, \
c11, rs_c, cs_c ); \
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
ct, rs_ct, cs_ct, \
c11, rs_c, cs_c ); \
} \
\
a1 += k_a1011 * PACKMR; \
@@ -404,10 +404,10 @@ void PASTEMAC(ch,varname)( \
a2, b2 ); \
\
/* Add the result to the edge of C. */ \
PASTEMAC3(ch,ch,ch,xpbys_mxn)( m_cur, n_cur, \
ct, rs_ct, cs_ct, \
alpha_cast, \
c11, rs_c, cs_c ); \
PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \
ct, rs_ct, cs_ct, \
alpha_cast, \
c11, rs_c, cs_c ); \
} \
\
a1 += rstep_a; \

View File

@@ -366,9 +366,9 @@ void PASTEMAC(ch,varname)( \
a2, b2 ); \
\
/* Copy the result to the bottom edge of C. */ \
PASTEMAC2(ch,ch,copys_mxn)( m_cur, n_cur, \
ct, rs_ct, cs_ct, \
c11, rs_c, cs_c ); \
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
ct, rs_ct, cs_ct, \
c11, rs_c, cs_c ); \
} \
\
a1 += k_a1112 * PACKMR; \
@@ -409,10 +409,10 @@ void PASTEMAC(ch,varname)( \
a2, b2 ); \
\
/* Add the result to the edge of C. */ \
PASTEMAC3(ch,ch,ch,xpbys_mxn)( m_cur, n_cur, \
ct, rs_ct, cs_ct, \
alpha_cast, \
c11, rs_c, cs_c ); \
PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \
ct, rs_ct, cs_ct, \
alpha_cast, \
c11, rs_c, cs_c ); \
} \
\
a1 += rstep_a; \

View File

@@ -177,4 +177,37 @@
}
#define bli_sadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \
{ \
bli_ssadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \
}
#define bli_dadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \
{ \
bli_ddadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \
}
#define bli_cadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \
{ \
bli_ccadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \
}
#define bli_zadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \
{ \
bli_zzadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \
}
#define bli_sadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \
{ \
bli_ssadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \
}
#define bli_dadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \
{ \
bli_ddadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \
}
#define bli_cadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \
{ \
bli_ccadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \
}
#define bli_zadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \
{ \
bli_zzadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \
}
#endif

View File

@@ -184,5 +184,37 @@
}
#define bli_sxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \
{\
bli_sssxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \
}
#define bli_dxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \
{\
bli_dddxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \
}
#define bli_cxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \
{\
bli_cccxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \
}
#define bli_zxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \
{\
bli_zzzxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \
}
#define bli_sxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \
{\
bli_sssxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \
}
#define bli_dxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \
{\
bli_dddxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \
}
#define bli_cxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \
{\
bli_cccxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \
}
#define bli_zxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \
{\
bli_zzzxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \
}
#endif