mirror of
https://github.com/amd/blis.git
synced 2026-04-19 23:28:52 +00:00
Use PASTEMAC in macro-kernels (over MAC2 or MAC3).
Details: - Replaced multi-type invocations of copys_mxn, xpbys_mxn, etc. (PASTEMAC2 and PASTEMAC3) with those that only use a single type (PASTEMAC). - Added extra macros to bli_adds_mxn_uplo.h and bli_xpbys_mxn_uplo.h to accommodate above change. - Fixed comment typo in bli_config.h files. - Added .nfs* pattern to .gitignore.
This commit is contained in:
2
.gitignore
vendored
2
.gitignore
vendored
@@ -4,6 +4,8 @@
|
||||
*~
|
||||
# vim backup files
|
||||
*.swp
|
||||
# NFS file
|
||||
.nfs*
|
||||
|
||||
# -- compiler-related --
|
||||
|
||||
|
||||
@@ -77,7 +77,7 @@
|
||||
#define BLIS_DEFAULT_KC_Z 256
|
||||
#define BLIS_DEFAULT_NC_Z 2048
|
||||
|
||||
// -- Ccache blocksize extensions (for optimizing edge cases) --
|
||||
// -- Cache blocksize extensions (for optimizing edge cases) --
|
||||
|
||||
// NOTE: These cache blocksize "extensions" have the same constraints as
|
||||
// the corresponding default blocksizes above.
|
||||
|
||||
@@ -77,7 +77,7 @@
|
||||
#define BLIS_DEFAULT_KC_Z 256
|
||||
#define BLIS_DEFAULT_NC_Z 2048
|
||||
|
||||
// -- Ccache blocksize extensions (for optimizing edge cases) --
|
||||
// -- Cache blocksize extensions (for optimizing edge cases) --
|
||||
|
||||
// NOTE: These cache blocksize "extensions" have the same constraints as
|
||||
// the corresponding default blocksizes above.
|
||||
|
||||
@@ -77,7 +77,7 @@
|
||||
#define BLIS_DEFAULT_KC_Z 256
|
||||
#define BLIS_DEFAULT_NC_Z 2048
|
||||
|
||||
// -- Ccache blocksize extensions (for optimizing edge cases) --
|
||||
// -- Cache blocksize extensions (for optimizing edge cases) --
|
||||
|
||||
// NOTE: These cache blocksize "extensions" have the same constraints as
|
||||
// the corresponding default blocksizes above.
|
||||
|
||||
@@ -291,10 +291,10 @@ void PASTEMAC(ch,varname)( \
|
||||
a2, b2 ); \
|
||||
\
|
||||
/* Scale the bottom edge of C and add the result from above. */ \
|
||||
PASTEMAC3(ch,ch,ch,xpbys_mxn)( m_left, NR, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
beta_cast, \
|
||||
c11, rs_c, cs_c ); \
|
||||
PASTEMAC(ch,xpbys_mxn)( m_left, NR, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
beta_cast, \
|
||||
c11, rs_c, cs_c ); \
|
||||
} \
|
||||
\
|
||||
b1 += cstep_b; \
|
||||
@@ -335,10 +335,10 @@ void PASTEMAC(ch,varname)( \
|
||||
a2, b2 ); \
|
||||
\
|
||||
/* Scale the right edge of C and add the result from above. */ \
|
||||
PASTEMAC3(ch,ch,ch,xpbys_mxn)( MR, n_left, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
beta_cast, \
|
||||
c11, rs_c, cs_c ); \
|
||||
PASTEMAC(ch,xpbys_mxn)( MR, n_left, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
beta_cast, \
|
||||
c11, rs_c, cs_c ); \
|
||||
\
|
||||
a1 += rstep_a; \
|
||||
c11 += rstep_c; \
|
||||
@@ -361,10 +361,10 @@ void PASTEMAC(ch,varname)( \
|
||||
a2, b2 ); \
|
||||
\
|
||||
/* Scale the bottom-right corner of C and add the result from above. */ \
|
||||
PASTEMAC3(ch,ch,ch,xpbys_mxn)( m_left, n_left, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
beta_cast, \
|
||||
c11, rs_c, cs_c ); \
|
||||
PASTEMAC(ch,xpbys_mxn)( m_left, n_left, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
beta_cast, \
|
||||
c11, rs_c, cs_c ); \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
|
||||
@@ -294,11 +294,11 @@ void PASTEMAC(ch,varname)( \
|
||||
a2, b2 ); \
|
||||
\
|
||||
/* Scale C and add the result to only the stored part. */ \
|
||||
PASTEMAC3(ch,ch,ch,xpbys_mxn_l)( diagoffc_ij, \
|
||||
m_cur, n_cur, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
beta_cast, \
|
||||
c11, rs_c, cs_c ); \
|
||||
PASTEMAC(ch,xpbys_mxn_l)( diagoffc_ij, \
|
||||
m_cur, n_cur, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
beta_cast, \
|
||||
c11, rs_c, cs_c ); \
|
||||
} \
|
||||
else if ( bli_is_strictly_below_diag_n( diagoffc_ij, m_cur, n_cur ) ) \
|
||||
{ \
|
||||
@@ -326,10 +326,10 @@ void PASTEMAC(ch,varname)( \
|
||||
a2, b2 ); \
|
||||
\
|
||||
/* Scale the edge of C and add the result. */ \
|
||||
PASTEMAC3(ch,ch,ch,xpbys_mxn)( m_cur, n_cur, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
beta_cast, \
|
||||
c11, rs_c, cs_c ); \
|
||||
PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
beta_cast, \
|
||||
c11, rs_c, cs_c ); \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
|
||||
@@ -294,11 +294,11 @@ void PASTEMAC(ch,varname)( \
|
||||
a2, b2 ); \
|
||||
\
|
||||
/* Scale C and add the result to only the stored part. */ \
|
||||
PASTEMAC3(ch,ch,ch,xpbys_mxn_u)( diagoffc_ij, \
|
||||
m_cur, n_cur, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
beta_cast, \
|
||||
c11, rs_c, cs_c ); \
|
||||
PASTEMAC(ch,xpbys_mxn_u)( diagoffc_ij, \
|
||||
m_cur, n_cur, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
beta_cast, \
|
||||
c11, rs_c, cs_c ); \
|
||||
} \
|
||||
else if ( bli_is_strictly_above_diag_n( diagoffc_ij, m_cur, n_cur ) ) \
|
||||
{ \
|
||||
@@ -326,10 +326,10 @@ void PASTEMAC(ch,varname)( \
|
||||
a2, b2 ); \
|
||||
\
|
||||
/* Scale the edge of C and add the result. */ \
|
||||
PASTEMAC3(ch,ch,ch,xpbys_mxn)( m_cur, n_cur, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
beta_cast, \
|
||||
c11, rs_c, cs_c ); \
|
||||
PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
beta_cast, \
|
||||
c11, rs_c, cs_c ); \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
|
||||
@@ -333,9 +333,9 @@ void PASTEMAC(ch,varname)( \
|
||||
else \
|
||||
{ \
|
||||
/* Copy edge elements of C to the temporary buffer. */ \
|
||||
PASTEMAC2(ch,ch,copys_mxn)( m_cur, n_cur, \
|
||||
c11, rs_c, cs_c, \
|
||||
ct, rs_ct, cs_ct ); \
|
||||
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
|
||||
c11, rs_c, cs_c, \
|
||||
ct, rs_ct, cs_ct ); \
|
||||
\
|
||||
/* Invoke the gemm micro-kernel. */ \
|
||||
PASTEMAC(ch,ukrname)( k_a1011, \
|
||||
@@ -347,9 +347,9 @@ void PASTEMAC(ch,varname)( \
|
||||
a2, b2 ); \
|
||||
\
|
||||
/* Copy the result to the edge of C. */ \
|
||||
PASTEMAC2(ch,ch,copys_mxn)( m_cur, n_cur, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
c11, rs_c, cs_c ); \
|
||||
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
c11, rs_c, cs_c ); \
|
||||
} \
|
||||
\
|
||||
a1 += k_a1011 * PACKMR; \
|
||||
@@ -390,9 +390,9 @@ void PASTEMAC(ch,varname)( \
|
||||
a2, b2 ); \
|
||||
\
|
||||
/* Add the result to the edge of C. */ \
|
||||
PASTEMAC2(ch,ch,adds_mxn)( m_cur, n_cur, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
c11, rs_c, cs_c ); \
|
||||
PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
c11, rs_c, cs_c ); \
|
||||
} \
|
||||
\
|
||||
a1 += rstep_a; \
|
||||
|
||||
@@ -333,9 +333,9 @@ void PASTEMAC(ch,varname)( \
|
||||
else \
|
||||
{ \
|
||||
/* Copy edge elements of C to the temporary buffer. */ \
|
||||
PASTEMAC2(ch,ch,copys_mxn)( m_cur, n_cur, \
|
||||
c11, rs_c, cs_c, \
|
||||
ct, rs_ct, cs_ct ); \
|
||||
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
|
||||
c11, rs_c, cs_c, \
|
||||
ct, rs_ct, cs_ct ); \
|
||||
\
|
||||
/* Invoke the gemm micro-kernel. */ \
|
||||
PASTEMAC(ch,ukrname)( k_a1112, \
|
||||
@@ -347,9 +347,9 @@ void PASTEMAC(ch,varname)( \
|
||||
a2, b2 ); \
|
||||
\
|
||||
/* Copy the result to the edge of C. */ \
|
||||
PASTEMAC2(ch,ch,copys_mxn)( m_cur, n_cur, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
c11, rs_c, cs_c ); \
|
||||
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
c11, rs_c, cs_c ); \
|
||||
} \
|
||||
\
|
||||
a1 += k_a1112 * PACKMR; \
|
||||
@@ -390,9 +390,9 @@ void PASTEMAC(ch,varname)( \
|
||||
a2, b2 ); \
|
||||
\
|
||||
/* Add the result to the edge of C. */ \
|
||||
PASTEMAC2(ch,ch,adds_mxn)( m_cur, n_cur, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
c11, rs_c, cs_c ); \
|
||||
PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
c11, rs_c, cs_c ); \
|
||||
} \
|
||||
\
|
||||
a1 += rstep_a; \
|
||||
|
||||
@@ -361,9 +361,9 @@ void PASTEMAC(ch,varname)( \
|
||||
a2, b2 ); \
|
||||
\
|
||||
/* Copy the result to the bottom edge of C. */ \
|
||||
PASTEMAC2(ch,ch,copys_mxn)( m_cur, n_cur, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
c11, rs_c, cs_c ); \
|
||||
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
c11, rs_c, cs_c ); \
|
||||
} \
|
||||
\
|
||||
a1 += k_a1011 * PACKMR; \
|
||||
@@ -404,10 +404,10 @@ void PASTEMAC(ch,varname)( \
|
||||
a2, b2 ); \
|
||||
\
|
||||
/* Add the result to the edge of C. */ \
|
||||
PASTEMAC3(ch,ch,ch,xpbys_mxn)( m_cur, n_cur, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
alpha_cast, \
|
||||
c11, rs_c, cs_c ); \
|
||||
PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
alpha_cast, \
|
||||
c11, rs_c, cs_c ); \
|
||||
} \
|
||||
\
|
||||
a1 += rstep_a; \
|
||||
|
||||
@@ -366,9 +366,9 @@ void PASTEMAC(ch,varname)( \
|
||||
a2, b2 ); \
|
||||
\
|
||||
/* Copy the result to the bottom edge of C. */ \
|
||||
PASTEMAC2(ch,ch,copys_mxn)( m_cur, n_cur, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
c11, rs_c, cs_c ); \
|
||||
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
c11, rs_c, cs_c ); \
|
||||
} \
|
||||
\
|
||||
a1 += k_a1112 * PACKMR; \
|
||||
@@ -409,10 +409,10 @@ void PASTEMAC(ch,varname)( \
|
||||
a2, b2 ); \
|
||||
\
|
||||
/* Add the result to the edge of C. */ \
|
||||
PASTEMAC3(ch,ch,ch,xpbys_mxn)( m_cur, n_cur, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
alpha_cast, \
|
||||
c11, rs_c, cs_c ); \
|
||||
PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
alpha_cast, \
|
||||
c11, rs_c, cs_c ); \
|
||||
} \
|
||||
\
|
||||
a1 += rstep_a; \
|
||||
|
||||
@@ -177,4 +177,37 @@
|
||||
}
|
||||
|
||||
|
||||
#define bli_sadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \
|
||||
{ \
|
||||
bli_ssadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \
|
||||
}
|
||||
#define bli_dadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \
|
||||
{ \
|
||||
bli_ddadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \
|
||||
}
|
||||
#define bli_cadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \
|
||||
{ \
|
||||
bli_ccadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \
|
||||
}
|
||||
#define bli_zadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \
|
||||
{ \
|
||||
bli_zzadds_mxn_u( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \
|
||||
}
|
||||
#define bli_sadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \
|
||||
{ \
|
||||
bli_ssadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \
|
||||
}
|
||||
#define bli_dadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \
|
||||
{ \
|
||||
bli_ddadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \
|
||||
}
|
||||
#define bli_cadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \
|
||||
{ \
|
||||
bli_ccadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \
|
||||
}
|
||||
#define bli_zadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ) \
|
||||
{ \
|
||||
bli_zzadds_mxn_l( diagoff, m, n, x, rs_x, cs_x, y, rs_y, cs_y ); \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -184,5 +184,37 @@
|
||||
}
|
||||
|
||||
|
||||
#define bli_sxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \
|
||||
{\
|
||||
bli_sssxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \
|
||||
}
|
||||
#define bli_dxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \
|
||||
{\
|
||||
bli_dddxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \
|
||||
}
|
||||
#define bli_cxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \
|
||||
{\
|
||||
bli_cccxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \
|
||||
}
|
||||
#define bli_zxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \
|
||||
{\
|
||||
bli_zzzxpbys_mxn_u( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \
|
||||
}
|
||||
#define bli_sxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \
|
||||
{\
|
||||
bli_sssxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \
|
||||
}
|
||||
#define bli_dxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \
|
||||
{\
|
||||
bli_dddxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \
|
||||
}
|
||||
#define bli_cxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \
|
||||
{\
|
||||
bli_cccxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \
|
||||
}
|
||||
#define bli_zxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \
|
||||
{\
|
||||
bli_zzzxpbys_mxn_l( diagoff, m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ); \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user