Introduced auxinfo_t argument to micro-kernels.

Details:
- Removed a_next and b_next arguments to micro-kernels and replaced them
  with a pointer to a new datatype, auxinfo_t, which is simply a struct
  that holds a_next and b_next. The struct may hold other auxiliary
  information that may be useful to a micro-kernel, such as micro-panel
  stride. Micro-kernels may access struct fields via accessor macros
  defined in bli_auxinfo_macro_defs.h.
- Updated all instances of micro-kernel definitions, micro-kernel calls,
  as well as macro-kernels (for declaring and initializing the structs)
  according to above change.
This commit is contained in:
Field G. Van Zee
2013-12-19 14:50:11 -06:00
parent 392428dea4
commit a0331fb10a
67 changed files with 779 additions and 564 deletions

View File

@@ -169,6 +169,7 @@ void PASTEMAC(ch,varname)( \
inc_t rstep_a; \
inc_t cstep_b; \
inc_t rstep_c, cstep_c; \
auxinfo_t aux; \
\
/*
Assumptions/assertions:
@@ -207,6 +208,10 @@ void PASTEMAC(ch,varname)( \
\
rstep_c = rs_c * MR; \
cstep_c = cs_c * NR; \
\
/* Save the panel strides of A and B to the auxinfo_t object. */ \
bli_auxinfo_set_ps_a( ps_a, aux ); \
bli_auxinfo_set_ps_b( ps_b, aux ); \
\
b1 = b_cast; \
c1 = c_cast; \
@@ -242,6 +247,11 @@ void PASTEMAC(ch,varname)( \
if ( j == n_iter - 1 ) \
b2 = b_cast; \
} \
\
/* Save addresses of next panels of A and B to the auxinfo_t
object. */ \
bli_auxinfo_set_next_a( a2, aux ); \
bli_auxinfo_set_next_b( b2, aux ); \
\
/* Handle interior and edge cases separately. */ \
if ( m_cur == MR && n_cur == NR ) \
@@ -253,7 +263,7 @@ void PASTEMAC(ch,varname)( \
b1, \
beta_cast, \
c11, rs_c, cs_c, \
a2, b2 ); \
&aux ); \
} \
else \
{ \
@@ -264,7 +274,7 @@ void PASTEMAC(ch,varname)( \
b1, \
zero, \
ct, rs_ct, cs_ct, \
a2, b2 ); \
&aux ); \
\
/* Scale the bottom edge of C and add the result from above. */ \
PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \

View File

@@ -179,6 +179,7 @@ void PASTEMAC(ch,varname)( \
inc_t rstep_a; \
inc_t cstep_b; \
inc_t rstep_c, cstep_c; \
auxinfo_t aux; \
\
/*
Assumptions/assertions:
@@ -217,6 +218,10 @@ void PASTEMAC(ch,varname)( \
\
rstep_c = rs_c * MR; \
cstep_c = cs_c * NR; \
\
/* Save the panel strides of A and B to the auxinfo_t object. */ \
bli_auxinfo_set_ps_a( ps_a, aux ); \
bli_auxinfo_set_ps_b( ps_b, aux ); \
\
b1 = b_cast; \
c1 = c_cast; \
@@ -224,6 +229,9 @@ void PASTEMAC(ch,varname)( \
/* Since we pack micro-panels of B incrementaly, one at a time, the
address of the next micro-panel of B remains constant. */ \
b2 = bp; \
\
/* Save address of next panel of B to the auxinfo_t object. */ \
bli_auxinfo_set_next_b( b2, aux ); \
\
/* Loop over the n dimension (NR columns at a time). */ \
for ( j = 0; j < n_iter; ++j ) \
@@ -257,6 +265,9 @@ void PASTEMAC(ch,varname)( \
{ \
a2 = a_cast; \
} \
\
/* Save address of next panel of A to the auxinfo_t object. */ \
bli_auxinfo_set_next_a( a2, aux ); \
\
/* Handle interior and edge cases separately. */ \
if ( m_cur == MR && n_cur == NR ) \
@@ -268,7 +279,7 @@ void PASTEMAC(ch,varname)( \
bp, \
beta_cast, \
c11, rs_c, cs_c, \
a2, b2 ); \
&aux ); \
} \
else \
{ \
@@ -279,7 +290,7 @@ void PASTEMAC(ch,varname)( \
bp, \
zero, \
ct, rs_ct, cs_ct, \
a2, b2 ); \
&aux ); \
\
/* Scale the bottom edge of C and add the result from above. */ \
PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \

View File

@@ -45,8 +45,7 @@ void PASTEMAC(ch,varname)( \
ctype* restrict b, \
ctype* restrict beta, \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
ctype* restrict a_next, \
ctype* restrict b_next \
auxinfo_t* data \
) \
{ \
const dim_t m = PASTEMAC(ch,mr); \

View File

@@ -46,8 +46,7 @@ void PASTEMAC(ch,varname)( \
ctype* restrict b, \
ctype* restrict beta, \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
ctype* restrict a_next, \
ctype* restrict b_next \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( gemm_ref_mxn )

View File

@@ -175,6 +175,7 @@ void PASTEMAC(ch,varname)( \
inc_t rstep_a; \
inc_t cstep_b; \
inc_t rstep_c, cstep_c; \
auxinfo_t aux; \
\
/*
Assumptions/assertions:
@@ -238,6 +239,10 @@ void PASTEMAC(ch,varname)( \
\
rstep_c = rs_c * MR; \
cstep_c = cs_c * NR; \
\
/* Save the panel strides of A and B to the auxinfo_t object. */ \
bli_auxinfo_set_ps_a( ps_a, aux ); \
bli_auxinfo_set_ps_b( ps_b, aux ); \
\
b1 = b_cast; \
c1 = c_cast; \
@@ -276,6 +281,11 @@ void PASTEMAC(ch,varname)( \
if ( j == n_iter - 1 ) \
b2 = b_cast; \
} \
\
/* Save addresses of next panels of A and B to the auxinfo_t
object. */ \
bli_auxinfo_set_next_a( a2, aux ); \
bli_auxinfo_set_next_b( b2, aux ); \
\
/* If the diagonal intersects the current MR x NR submatrix, we
compute it the temporary buffer and then add in the elements
@@ -293,7 +303,7 @@ void PASTEMAC(ch,varname)( \
b1, \
zero, \
ct, rs_ct, cs_ct, \
a2, b2 ); \
&aux ); \
\
/* Scale C and add the result to only the stored part. */ \
PASTEMAC(ch,xpbys_mxn_l)( diagoffc_ij, \
@@ -314,7 +324,7 @@ void PASTEMAC(ch,varname)( \
b1, \
beta_cast, \
c11, rs_c, cs_c, \
a2, b2 ); \
&aux ); \
} \
else \
{ \
@@ -325,7 +335,7 @@ void PASTEMAC(ch,varname)( \
b1, \
zero, \
ct, rs_ct, cs_ct, \
a2, b2 ); \
&aux ); \
\
/* Scale the edge of C and add the result. */ \
PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \

View File

@@ -175,6 +175,7 @@ void PASTEMAC(ch,varname)( \
inc_t rstep_a; \
inc_t cstep_b; \
inc_t rstep_c, cstep_c; \
auxinfo_t aux; \
\
/*
Assumptions/assertions:
@@ -238,6 +239,10 @@ void PASTEMAC(ch,varname)( \
\
rstep_c = rs_c * MR; \
cstep_c = cs_c * NR; \
\
/* Save the panel strides of A and B to the auxinfo_t object. */ \
bli_auxinfo_set_ps_a( ps_a, aux ); \
bli_auxinfo_set_ps_b( ps_b, aux ); \
\
b1 = b_cast; \
c1 = c_cast; \
@@ -276,6 +281,11 @@ void PASTEMAC(ch,varname)( \
if ( j == n_iter - 1 ) \
b2 = b_cast; \
} \
\
/* Save addresses of next panels of A and B to the auxinfo_t
object. */ \
bli_auxinfo_set_next_a( a2, aux ); \
bli_auxinfo_set_next_b( b2, aux ); \
\
/* If the diagonal intersects the current MR x NR submatrix, we
compute it the temporary buffer and then add in the elements
@@ -293,7 +303,7 @@ void PASTEMAC(ch,varname)( \
b1, \
zero, \
ct, rs_ct, cs_ct, \
a2, b2 ); \
&aux ); \
\
/* Scale C and add the result to only the stored part. */ \
PASTEMAC(ch,xpbys_mxn_u)( diagoffc_ij, \
@@ -314,7 +324,7 @@ void PASTEMAC(ch,varname)( \
b1, \
beta_cast, \
c11, rs_c, cs_c, \
a2, b2 ); \
&aux ); \
} \
else \
{ \
@@ -325,7 +335,7 @@ void PASTEMAC(ch,varname)( \
b1, \
zero, \
ct, rs_ct, cs_ct, \
a2, b2 ); \
&aux ); \
\
/* Scale the edge of C and add the result. */ \
PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \

View File

@@ -162,6 +162,7 @@ void PASTEMAC(ch,varname)( \
inc_t rstep_a; \
inc_t cstep_b; \
inc_t rstep_c, cstep_c; \
auxinfo_t aux; \
\
/*
Assumptions/assertions:
@@ -224,6 +225,10 @@ void PASTEMAC(ch,varname)( \
\
rstep_c = rs_c * MR; \
cstep_c = cs_c * NR; \
\
/* Save the panel strides of A and B to the auxinfo_t object. */ \
bli_auxinfo_set_ps_a( ps_a, aux ); \
bli_auxinfo_set_ps_b( ps_b, aux ); \
\
b1 = b_cast; \
c1 = c_cast; \
@@ -276,6 +281,11 @@ void PASTEMAC(ch,varname)( \
if ( j == n_iter - 1 ) \
b2 = b_cast; \
} \
\
/* Save addresses of next panels of A and B to the auxinfo_t
object. */ \
bli_auxinfo_set_next_a( a2, aux ); \
bli_auxinfo_set_next_b( b2, aux ); \
\
/* Handle interior and edge cases separately. */ \
if ( m_cur == MR && n_cur == NR ) \
@@ -287,7 +297,7 @@ void PASTEMAC(ch,varname)( \
b1_i, \
beta_cast, \
c11, rs_c, cs_c, \
a2, b2 ); \
&aux ); \
} \
else \
{ \
@@ -303,7 +313,7 @@ void PASTEMAC(ch,varname)( \
b1_i, \
beta_cast, \
ct, rs_ct, cs_ct, \
a2, b2 ); \
&aux ); \
\
/* Copy the result to the edge of C. */ \
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
@@ -326,6 +336,11 @@ void PASTEMAC(ch,varname)( \
if ( j == n_iter - 1 ) \
b2 = b_cast; \
} \
\
/* Save addresses of next panels of A and B to the auxinfo_t
object. */ \
bli_auxinfo_set_next_a( a2, aux ); \
bli_auxinfo_set_next_b( b2, aux ); \
\
/* Handle interior and edge cases separately. */ \
if ( m_cur == MR && n_cur == NR ) \
@@ -337,7 +352,7 @@ void PASTEMAC(ch,varname)( \
b1, \
one, \
c11, rs_c, cs_c, \
a2, b2 ); \
&aux ); \
} \
else \
{ \
@@ -348,7 +363,7 @@ void PASTEMAC(ch,varname)( \
b1, \
zero, \
ct, rs_ct, cs_ct, \
a2, b2 ); \
&aux ); \
\
/* Add the result to the edge of C. */ \
PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \

View File

@@ -162,6 +162,7 @@ void PASTEMAC(ch,varname)( \
inc_t rstep_a; \
inc_t cstep_b; \
inc_t rstep_c, cstep_c; \
auxinfo_t aux; \
\
/*
Assumptions/assertions:
@@ -232,6 +233,10 @@ void PASTEMAC(ch,varname)( \
\
rstep_c = rs_c * MR; \
cstep_c = cs_c * NR; \
\
/* Save the panel strides of A and B to the auxinfo_t object. */ \
bli_auxinfo_set_ps_a( ps_a, aux ); \
bli_auxinfo_set_ps_b( ps_b, aux ); \
\
b1 = b_cast; \
c1 = c_cast; \
@@ -284,6 +289,11 @@ void PASTEMAC(ch,varname)( \
if ( j == n_iter - 1 ) \
b2 = b_cast; \
} \
\
/* Save addresses of next panels of A and B to the auxinfo_t
object. */ \
bli_auxinfo_set_next_a( a2, aux ); \
bli_auxinfo_set_next_b( b2, aux ); \
\
/* Handle interior and edge cases separately. */ \
if ( m_cur == MR && n_cur == NR ) \
@@ -295,7 +305,7 @@ void PASTEMAC(ch,varname)( \
b1_i, \
beta_cast, \
c11, rs_c, cs_c, \
a2, b2 ); \
&aux ); \
} \
else \
{ \
@@ -311,7 +321,7 @@ void PASTEMAC(ch,varname)( \
b1_i, \
beta_cast, \
ct, rs_ct, cs_ct, \
a2, b2 ); \
&aux ); \
\
/* Copy the result to the edge of C. */ \
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
@@ -334,6 +344,11 @@ void PASTEMAC(ch,varname)( \
if ( j == n_iter - 1 ) \
b2 = b_cast; \
} \
\
/* Save addresses of next panels of A and B to the auxinfo_t
object. */ \
bli_auxinfo_set_next_a( a2, aux ); \
bli_auxinfo_set_next_b( b2, aux ); \
\
/* Handle interior and edge cases separately. */ \
if ( m_cur == MR && n_cur == NR ) \
@@ -345,7 +360,7 @@ void PASTEMAC(ch,varname)( \
b1, \
one, \
c11, rs_c, cs_c, \
a2, b2 ); \
&aux ); \
} \
else \
{ \
@@ -356,7 +371,7 @@ void PASTEMAC(ch,varname)( \
b1, \
zero, \
ct, rs_ct, cs_ct, \
a2, b2 ); \
&aux ); \
\
/* Add the result to the edge of C. */ \
PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \

View File

@@ -163,6 +163,7 @@ void PASTEMAC(ch,varname)( \
inc_t rstep_a; \
inc_t cstep_b; \
inc_t rstep_c, cstep_c; \
auxinfo_t aux; \
\
/*
Assumptions/assertions:
@@ -233,6 +234,10 @@ void PASTEMAC(ch,varname)( \
\
rstep_c = rs_c * MR; \
cstep_c = cs_c * NR; \
\
/* Save the panel strides of A and B to the auxinfo_t object. */ \
bli_auxinfo_set_ps_a( ps_a, aux ); \
bli_auxinfo_set_ps_b( ps_b, aux ); \
\
b1 = b_cast; \
c1 = c_cast; \
@@ -284,6 +289,11 @@ void PASTEMAC(ch,varname)( \
if ( j == n_iter - 1 ) \
b2 = b_cast; \
} \
\
/* Save addresses of next panels of A and B to the auxinfo_t
object. */ \
bli_auxinfo_set_next_a( a2, aux ); \
bli_auxinfo_set_next_b( b2, aux ); \
\
/* Handle interior and edge cases separately. */ \
if ( m_cur == MR && n_cur == NR ) \
@@ -295,7 +305,7 @@ void PASTEMAC(ch,varname)( \
b1, \
beta_cast, \
c11, rs_c, cs_c, \
a2, b2 ); \
&aux ); \
} \
else \
{ \
@@ -311,7 +321,7 @@ void PASTEMAC(ch,varname)( \
b1, \
beta_cast, \
ct, rs_ct, cs_ct, \
a2, b2 ); \
&aux ); \
\
/* Copy the result to the edge of C. */ \
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
@@ -341,6 +351,11 @@ void PASTEMAC(ch,varname)( \
if ( j == n_iter - 1 ) \
b2 = b_cast; \
} \
\
/* Save addresses of next panels of A and B to the auxinfo_t
object. */ \
bli_auxinfo_set_next_a( a2, aux ); \
bli_auxinfo_set_next_b( b2, aux ); \
\
/* Handle interior and edge cases separately. */ \
if ( m_cur == MR && n_cur == NR ) \
@@ -352,7 +367,7 @@ void PASTEMAC(ch,varname)( \
b1, \
one, \
c11, rs_c, cs_c, \
a2, b2 ); \
&aux ); \
} \
else \
{ \
@@ -363,7 +378,7 @@ void PASTEMAC(ch,varname)( \
b1, \
zero, \
ct, rs_ct, cs_ct, \
a2, b2 ); \
&aux ); \
\
/* Add the result to the edge of C. */ \
PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \

View File

@@ -163,6 +163,7 @@ void PASTEMAC(ch,varname)( \
inc_t rstep_a; \
inc_t cstep_b; \
inc_t rstep_c, cstep_c; \
auxinfo_t aux; \
\
/*
Assumptions/assertions:
@@ -234,6 +235,10 @@ void PASTEMAC(ch,varname)( \
\
rstep_c = rs_c * MR; \
cstep_c = cs_c * NR; \
\
/* Save the panel strides of A and B to the auxinfo_t object. */ \
bli_auxinfo_set_ps_a( ps_a, aux ); \
bli_auxinfo_set_ps_b( ps_b, aux ); \
\
b1 = b_cast; \
c1 = c_cast; \
@@ -284,6 +289,11 @@ void PASTEMAC(ch,varname)( \
if ( j == n_iter - 1 ) \
b2 = b_cast; \
} \
\
/* Save addresses of next panels of A and B to the auxinfo_t
object. */ \
bli_auxinfo_set_next_a( a2, aux ); \
bli_auxinfo_set_next_b( b2, aux ); \
\
/* Handle interior and edge cases separately. */ \
if ( m_cur == MR && n_cur == NR ) \
@@ -295,7 +305,7 @@ void PASTEMAC(ch,varname)( \
b1, \
beta_cast, \
c11, rs_c, cs_c, \
a2, b2 ); \
&aux ); \
} \
else \
{ \
@@ -311,7 +321,7 @@ void PASTEMAC(ch,varname)( \
b1, \
beta_cast, \
ct, rs_ct, cs_ct, \
a2, b2 ); \
&aux ); \
\
/* Copy the result to the edge of C. */ \
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
@@ -341,6 +351,11 @@ void PASTEMAC(ch,varname)( \
if ( j == n_iter - 1 ) \
b2 = b_cast; \
} \
\
/* Save addresses of next panels of A and B to the auxinfo_t
object. */ \
bli_auxinfo_set_next_a( a2, aux ); \
bli_auxinfo_set_next_b( b2, aux ); \
\
/* Handle interior and edge cases separately. */ \
if ( m_cur == MR && n_cur == NR ) \
@@ -352,7 +367,7 @@ void PASTEMAC(ch,varname)( \
b1, \
one, \
c11, rs_c, cs_c, \
a2, b2 ); \
&aux ); \
} \
else \
{ \
@@ -363,7 +378,7 @@ void PASTEMAC(ch,varname)( \
b1, \
zero, \
ct, rs_ct, cs_ct, \
a2, b2 ); \
&aux ); \
\
/* Add the result to the edge of C. */ \
PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \

View File

@@ -151,6 +151,7 @@ void PASTEMAC(ch,varname)( \
dim_t rstep_a; \
dim_t cstep_b; \
dim_t rstep_c, cstep_c; \
auxinfo_t aux; \
\
/*
Assumptions/assertions:
@@ -223,6 +224,10 @@ void PASTEMAC(ch,varname)( \
\
rstep_c = rs_c * MR; \
cstep_c = cs_c * NR; \
\
/* Save the panel strides of A and B to the auxinfo_t object. */ \
bli_auxinfo_set_ps_a( ps_a, aux ); \
bli_auxinfo_set_ps_b( ps_b, aux ); \
\
b1 = b_cast; \
c1 = c_cast; \
@@ -288,6 +293,11 @@ void PASTEMAC(ch,varname)( \
if ( j == n_iter - 1 ) \
b2 = b_cast; \
} \
\
/* Save addresses of next panels of A and B to the auxinfo_t
object. */ \
bli_auxinfo_set_next_a( a2, aux ); \
bli_auxinfo_set_next_b( b2, aux ); \
\
/* Handle interior and edge cases separately. */ \
if ( m_cur == MR && n_cur == NR ) \
@@ -300,7 +310,7 @@ void PASTEMAC(ch,varname)( \
b01, \
b11, \
c11, rs_c, cs_c, \
a2, b2 ); \
&aux ); \
} \
else \
{ \
@@ -312,7 +322,7 @@ void PASTEMAC(ch,varname)( \
b01, \
b11, \
ct, rs_ct, cs_ct, \
a2, b2 ); \
&aux ); \
\
/* Copy the result to the bottom edge of C. */ \
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
@@ -335,6 +345,11 @@ void PASTEMAC(ch,varname)( \
if ( j == n_iter - 1 ) \
b2 = b_cast; \
} \
\
/* Save addresses of next panels of A and B to the auxinfo_t
object. */ \
bli_auxinfo_set_next_a( a2, aux ); \
bli_auxinfo_set_next_b( b2, aux ); \
\
/* Handle interior and edge cases separately. */ \
if ( m_cur == MR && n_cur == NR ) \
@@ -346,7 +361,7 @@ void PASTEMAC(ch,varname)( \
b1, \
alpha_cast, \
c11, rs_c, cs_c, \
a2, b2 ); \
&aux ); \
} \
else \
{ \
@@ -357,7 +372,7 @@ void PASTEMAC(ch,varname)( \
b1, \
zero, \
ct, rs_ct, cs_ct, \
a2, b2 ); \
&aux ); \
\
/* Add the result to the edge of C. */ \
PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \

View File

@@ -152,6 +152,7 @@ void PASTEMAC(ch,varname)( \
dim_t rstep_a; \
dim_t cstep_b; \
dim_t rstep_c, cstep_c; \
auxinfo_t aux; \
\
/*
Assumptions/assertions:
@@ -232,6 +233,10 @@ void PASTEMAC(ch,varname)( \
\
rstep_c = rs_c * MR; \
cstep_c = cs_c * NR; \
\
/* Save the panel strides of A and B to the auxinfo_t object. */ \
bli_auxinfo_set_ps_a( ps_a, aux ); \
bli_auxinfo_set_ps_b( ps_b, aux ); \
\
b1 = b_cast; \
c1 = c_cast; \
@@ -299,6 +304,11 @@ void PASTEMAC(ch,varname)( \
if ( j == n_iter - 1 ) \
b2 = b_cast; \
} \
\
/* Save addresses of next panels of A and B to the auxinfo_t
object. */ \
bli_auxinfo_set_next_a( a2, aux ); \
bli_auxinfo_set_next_b( b2, aux ); \
\
/* Handle interior and edge cases separately. */ \
if ( m_cur == MR && n_cur == NR ) \
@@ -311,7 +321,7 @@ void PASTEMAC(ch,varname)( \
b21, \
b11, \
c11, rs_c, cs_c, \
a2, b2 ); \
&aux ); \
} \
else \
{ \
@@ -323,7 +333,7 @@ void PASTEMAC(ch,varname)( \
b21, \
b11, \
ct, rs_ct, cs_ct, \
a2, b2 ); \
&aux ); \
\
/* Copy the result to the bottom edge of C. */ \
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
@@ -346,6 +356,11 @@ void PASTEMAC(ch,varname)( \
if ( j == n_iter - 1 ) \
b2 = b_cast; \
} \
\
/* Save addresses of next panels of A and B to the auxinfo_t
object. */ \
bli_auxinfo_set_next_a( a2, aux ); \
bli_auxinfo_set_next_b( b2, aux ); \
\
/* Handle interior and edge cases separately. */ \
if ( m_cur == MR && n_cur == NR ) \
@@ -357,7 +372,7 @@ void PASTEMAC(ch,varname)( \
b1, \
alpha_cast, \
c11, rs_c, cs_c, \
a2, b2 ); \
&aux ); \
} \
else \
{ \
@@ -368,7 +383,7 @@ void PASTEMAC(ch,varname)( \
b1, \
zero, \
ct, rs_ct, cs_ct, \
a2, b2 ); \
&aux ); \
\
/* Add the result to the edge of C. */ \
PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \

View File

@@ -152,6 +152,7 @@ void PASTEMAC(ch,varname)( \
dim_t rstep_a; \
dim_t cstep_b; \
dim_t rstep_c, cstep_c; \
auxinfo_t aux; \
\
/*
Assumptions/assertions:
@@ -237,6 +238,12 @@ void PASTEMAC(ch,varname)( \
\
rstep_c = rs_c * MR; \
cstep_c = cs_c * NR; \
\
/* Save the panel strides of A and B to the auxinfo_t object.
NOTE: We swap the values for A and B since the triangular
"A" matrix is actually contained within B. */ \
bli_auxinfo_set_ps_a( ps_b, aux ); \
bli_auxinfo_set_ps_b( ps_a, aux ); \
\
b1 = b_cast; \
c1 = c_cast; \
@@ -302,6 +309,12 @@ void PASTEMAC(ch,varname)( \
if ( jb == n_iter - 1 ) \
b2 = b_cast; \
} \
\
/* Save addresses of next panels of A and B to the auxinfo_t
object. NOTE: We swap the values for A and B since the
triangular "A" matrix is actually contained within B. */ \
bli_auxinfo_set_next_a( b2, aux ); \
bli_auxinfo_set_next_b( a2, aux ); \
\
/* Handle interior and edge cases separately. */ \
if ( m_cur == MR && n_cur == NR ) \
@@ -314,7 +327,7 @@ void PASTEMAC(ch,varname)( \
a12, \
a11, \
c11, cs_c, rs_c, \
b2, a2 ); \
&aux ); \
} \
else \
{ \
@@ -326,7 +339,7 @@ void PASTEMAC(ch,varname)( \
a12, \
a11, \
ct, cs_ct, rs_ct, \
b2, a2 ); \
&aux ); \
\
/* Copy the result to the bottom edge of C. */ \
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
@@ -356,6 +369,12 @@ void PASTEMAC(ch,varname)( \
if ( jb == n_iter - 1 ) \
b2 = b_cast; \
} \
\
/* Save addresses of next panels of A and B to the auxinfo_t
object. NOTE: We swap the values for A and B since the
triangular "A" matrix is actually contained within B. */ \
bli_auxinfo_set_next_a( b2, aux ); \
bli_auxinfo_set_next_b( a2, aux ); \
\
/* Handle interior and edge cases separately. */ \
if ( m_cur == MR && n_cur == NR ) \
@@ -367,7 +386,7 @@ void PASTEMAC(ch,varname)( \
a1, \
alpha_cast, \
c11, cs_c, rs_c, \
b2, a2 ); \
&aux ); \
} \
else \
{ \
@@ -378,7 +397,7 @@ void PASTEMAC(ch,varname)( \
a1, \
zero, \
ct, cs_ct, rs_ct, \
b2, a2 ); \
&aux ); \
\
/* Add the result to the edge of C. */ \
PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \

View File

@@ -151,6 +151,7 @@ void PASTEMAC(ch,varname)( \
dim_t rstep_a; \
dim_t cstep_b; \
dim_t rstep_c, cstep_c; \
auxinfo_t aux; \
\
/*
Assumptions/assertions:
@@ -232,6 +233,12 @@ void PASTEMAC(ch,varname)( \
\
rstep_c = rs_c * MR; \
cstep_c = cs_c * NR; \
\
/* Save the panel strides of A and B to the auxinfo_t object.
NOTE: We swap the values for A and B since the triangular
"A" matrix is actually contained within B. */ \
bli_auxinfo_set_ps_a( ps_b, aux ); \
bli_auxinfo_set_ps_b( ps_a, aux ); \
\
b1 = b_cast; \
c1 = c_cast; \
@@ -296,6 +303,12 @@ void PASTEMAC(ch,varname)( \
if ( j == n_iter - 1 ) \
b2 = b_cast; \
} \
\
/* Save addresses of next panels of A and B to the auxinfo_t
object. NOTE: We swap the values for A and B since the
triangular "A" matrix is actually contained within B. */ \
bli_auxinfo_set_next_a( b2, aux ); \
bli_auxinfo_set_next_b( a2, aux ); \
\
/* Handle interior and edge cases separately. */ \
if ( m_cur == MR && n_cur == NR ) \
@@ -308,7 +321,7 @@ void PASTEMAC(ch,varname)( \
a10, \
a11, \
c11, cs_c, rs_c, \
b2, a2 ); \
&aux ); \
} \
else \
{ \
@@ -320,7 +333,7 @@ void PASTEMAC(ch,varname)( \
a10, \
a11, \
ct, cs_ct, rs_ct, \
b2, a2 ); \
&aux ); \
\
/* Copy the result to the bottom edge of C. */ \
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
@@ -350,6 +363,12 @@ void PASTEMAC(ch,varname)( \
if ( j == n_iter - 1 ) \
b2 = b_cast; \
} \
\
/* Save addresses of next panels of A and B to the auxinfo_t
object. NOTE: We swap the values for A and B since the
triangular "A" matrix is actually contained within B. */ \
bli_auxinfo_set_next_a( b2, aux ); \
bli_auxinfo_set_next_b( a2, aux ); \
\
/* Handle interior and edge cases separately. */ \
if ( m_cur == MR && n_cur == NR ) \
@@ -361,7 +380,7 @@ void PASTEMAC(ch,varname)( \
a1, \
alpha_cast, \
c11, cs_c, rs_c, \
b2, a2 ); \
&aux ); \
} \
else \
{ \
@@ -372,7 +391,7 @@ void PASTEMAC(ch,varname)( \
a1, \
zero, \
ct, cs_ct, rs_ct, \
b2, a2 ); \
&aux ); \
\
/* Add the result to the edge of C. */ \
PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \

View File

@@ -46,8 +46,7 @@ void PASTEMAC(ch,varname)( \
ctype* restrict b01, \
ctype* restrict b11, \
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
ctype* restrict a_next, \
ctype* restrict b_next \
auxinfo_t* data \
) \
{ \
const inc_t rs_b = PASTEMAC(ch,packnr); \
@@ -62,14 +61,14 @@ void PASTEMAC(ch,varname)( \
b01, \
alpha, \
b11, rs_b, cs_b, \
a_next, \
b_next ); \
data ); \
\
/* b11 = inv(a11) * b11;
c11 = b11; */ \
PASTEMAC(ch,trsmukr)( a11, \
b11, \
c11, rs_c, cs_c ); \
c11, rs_c, cs_c, \
data ); \
}
INSERT_GENTFUNC_BASIC2( gemmtrsm_l_ref_mxn, GEMM_UKERNEL, TRSM_L_UKERNEL )

View File

@@ -47,8 +47,7 @@ void PASTEMAC(ch,varname)( \
ctype* restrict b01, \
ctype* restrict b11, \
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
ctype* restrict a_next, \
ctype* restrict b_next \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( gemmtrsm_l_ref_mxn )

View File

@@ -46,8 +46,7 @@ void PASTEMAC(ch,varname)( \
ctype* restrict b21, \
ctype* restrict b11, \
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
ctype* restrict a_next, \
ctype* restrict b_next \
auxinfo_t* data \
) \
{ \
const inc_t rs_b = PASTEMAC(ch,packnr); \
@@ -62,14 +61,14 @@ void PASTEMAC(ch,varname)( \
b21, \
alpha, \
b11, rs_b, cs_b, \
a_next, \
b_next ); \
data ); \
\
/* b11 = inv(a11) * b11;
c11 = b11; */ \
PASTEMAC(ch,trsmukr)( a11, \
b11, \
c11, rs_c, cs_c ); \
c11, rs_c, cs_c, \
data ); \
}
INSERT_GENTFUNC_BASIC2( gemmtrsm_u_ref_mxn, GEMM_UKERNEL, TRSM_U_UKERNEL )

View File

@@ -47,8 +47,7 @@ void PASTEMAC(ch,varname)( \
ctype* restrict b21, \
ctype* restrict b11, \
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
ctype* restrict a_next, \
ctype* restrict b_next \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( gemmtrsm_u_ref_mxn )

View File

@@ -41,7 +41,8 @@
void PASTEMAC(ch,varname)( \
ctype* restrict a, \
ctype* restrict b, \
ctype* restrict c, inc_t rs_c, inc_t cs_c \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
) \
{ \
const dim_t m = PASTEMAC(ch,mr); \

View File

@@ -42,7 +42,8 @@
void PASTEMAC(ch,varname)( \
ctype* restrict a, \
ctype* restrict b, \
ctype* restrict c, inc_t rs_c, inc_t cs_c \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( trsm_l_ref_mxn )

View File

@@ -41,7 +41,8 @@
void PASTEMAC(ch,varname)( \
ctype* restrict a, \
ctype* restrict b, \
ctype* restrict c, inc_t rs_c, inc_t cs_c \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
) \
{ \
const dim_t m = PASTEMAC(ch,mr); \

View File

@@ -42,7 +42,8 @@
void PASTEMAC(ch,varname)( \
ctype* restrict a, \
ctype* restrict b, \
ctype* restrict c, inc_t rs_c, inc_t cs_c \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( trsm_u_ref_mxn )

View File

@@ -0,0 +1,58 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2013, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_AUXINFO_MACRO_DEFS_H
#define BLIS_AUXINFO_MACRO_DEFS_H
// auxinfo_t field query
#define bli_auxinfo_next_a( auxinfo ) ( (auxinfo)->a_next )
#define bli_auxinfo_next_b( auxinfo ) ( (auxinfo)->b_next )
#define bli_auxinfo_ps_a( auxinfo ) ( (auxinfo)->ps_a )
#define bli_auxinfo_ps_b( auxinfo ) ( (auxinfo)->ps_b )
// auxinfo_t field modification
#define bli_auxinfo_set_next_a( a_p, auxinfo ) { (auxinfo).a_next = a_p; }
#define bli_auxinfo_set_next_b( b_p, auxinfo ) { (auxinfo).b_next = b_p; }
#define bli_auxinfo_set_ps_a( a_p, auxinfo ) { (auxinfo).ps_a = a_p; }
#define bli_auxinfo_set_ps_b( b_p, auxinfo ) { (auxinfo).ps_b = b_p; }
#endif

View File

@@ -91,6 +91,7 @@
#include "bli_scalar_macro_defs.h"
#include "bli_error_macro_defs.h"
#include "bli_blas_macro_defs.h"
#include "bli_auxinfo_macro_defs.h"
#endif

View File

@@ -63,32 +63,32 @@
#define bli_pool_set_block_ptrs( block_ptrs0, pool_p ) \
{ \
pool_p->block_ptrs = block_ptrs0; \
(pool_p)->block_ptrs = block_ptrs0; \
}
#define bli_pool_set_num_blocks( num_blocks0, pool_p ) \
{ \
pool_p->num_blocks = num_blocks0; \
(pool_p)->num_blocks = num_blocks0; \
}
#define bli_pool_set_block_size( block_size0, pool_p ) \
{ \
pool_p->block_size = block_size0; \
(pool_p)->block_size = block_size0; \
}
#define bli_pool_set_top_index( top_index0, pool_p ) \
{ \
pool_p->top_index = top_index0; \
(pool_p)->top_index = top_index0; \
}
#define bli_pool_dec_top_index( pool_p ) \
{ \
(pool_p->top_index)--; \
((pool_p)->top_index)--; \
}
#define bli_pool_inc_top_index( pool_p ) \
{ \
(pool_p->top_index)++; \
((pool_p)->top_index)++; \
}
#define bli_pool_init( num_blocks, block_size, block_ptrs, pool_p ) \

View File

@@ -167,6 +167,26 @@ typedef double f77_double;
typedef scomplex f77_scomplex;
typedef dcomplex f77_dcomplex;
// -- Auxiliary kernel info type --
// Note: This struct is used by macro-kernels to package together extra
// parameter values that may be of use to the micro-kernel without
// cluttering up the micro-kernel interface itself.
typedef struct
{
// Pointers to the micro-panels of A and B which will be used by the
// next call to the micro-kernel.
void* a_next;
void* b_next;
// The panel strides of A and B.
inc_t ps_a;
inc_t ps_b;
} auxinfo_t;
//
// -- BLIS info bit field masks ------------------------------------------------

View File

@@ -35,16 +35,18 @@
#include "blis.h"
void bli_sgemm_opt_4x4(
dim_t k,
float* restrict alpha,
float* restrict a,
float* restrict b,
float* restrict beta,
float* restrict c, inc_t rs_c, inc_t cs_c,
float* restrict a_next,
float* restrict b_next
dim_t k,
float* restrict alpha,
float* restrict a,
float* restrict b,
float* restrict beta,
float* restrict c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
void* a_next = bli_auxinfo_next_a( data );
void* b_next = bli_auxinfo_next_b( data );
float32x4_t alphav;
alphav = vmovq_n_f32( *alpha );
@@ -262,16 +264,18 @@ void bli_sgemm_opt_4x4(
}
void bli_dgemm_opt_4x4(
dim_t k,
double* restrict alpha,
double* restrict a,
double* restrict b,
double* restrict beta,
double* restrict c, inc_t rs_c, inc_t cs_c,
double* restrict a_next,
double* restrict b_next
dim_t k,
double* restrict alpha,
double* restrict a,
double* restrict b,
double* restrict beta,
double* restrict c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
//void* a_next = bli_auxinfo_next_a( data );
//void* b_next = bli_auxinfo_next_b( data );
//dim_t k_iter;
dim_t k_left;
@@ -515,8 +519,7 @@ void bli_cgemm_opt_4x4(
scomplex* restrict b,
scomplex* restrict beta,
scomplex* restrict c, inc_t rs_c, inc_t cs_c,
scomplex* restrict a_next,
scomplex* restrict b_next
auxinfo_t* data
)
{
/* Just call the reference implementation. */
@@ -526,8 +529,7 @@ void bli_cgemm_opt_4x4(
b,
beta,
c, rs_c, cs_c,
a_next,
b_next );
data );
}
void bli_zgemm_opt_4x4(
@@ -537,8 +539,7 @@ void bli_zgemm_opt_4x4(
dcomplex* restrict b,
dcomplex* restrict beta,
dcomplex* restrict c, inc_t rs_c, inc_t cs_c,
dcomplex* restrict a_next,
dcomplex* restrict b_next
auxinfo_t* data
)
{
/* Just call the reference implementation. */
@@ -548,7 +549,6 @@ void bli_zgemm_opt_4x4(
b,
beta,
c, rs_c, cs_c,
a_next,
b_next );
data );
}

View File

@@ -45,8 +45,7 @@ void PASTEMAC(ch,varname)( \
ctype* restrict b, \
ctype* restrict beta, \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
ctype* restrict a_next, \
ctype* restrict b_next \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( gemm_opt_4x4 )

View File

@@ -36,14 +36,14 @@
#undef restrict
void bli_sgemm_8x8(
dim_t k,
float* alpha,
float* a,
float* b,
float* beta,
float* c, inc_t rs_c, inc_t cs_c,
float* a_next, float* b_next
)
dim_t k,
float* alpha,
float* a,
float* b,
float* beta,
float* c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
/* Just call the reference implementation. */
bli_sgemm_ref_mxn( k,
@@ -52,8 +52,7 @@ void bli_sgemm_8x8(
b,
beta,
c, rs_c, cs_c,
a_next,
b_next );
data );
}
@@ -76,14 +75,14 @@ void bli_sgemm_8x8(
*/
void bli_dgemm_8x8(
dim_t k,
restrict double* alpha,
restrict double* a,
restrict double* b,
restrict double* beta,
restrict double* c, inc_t rs_c, inc_t cs_c,
restrict double* a_next, restrict double* b_next
)
dim_t k,
double* alpha,
double* a,
double* b,
double* beta,
double* c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
//Registers for storing C.
@@ -224,33 +223,34 @@ void bli_dgemm_8x8(
}
void bli_dgemm_8x8_mt(
dim_t k,
restrict double* alpha,
restrict double* a,
restrict double* b,
restrict double* beta,
restrict double* c, inc_t rs_c, inc_t cs_c,
restrict double* a_next, restrict double* b_next,
dim_t tid
)
dim_t k,
double* alpha,
double* a,
double* b,
double* beta,
double* c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data,
dim_t tid
)
{
bli_dgemm_8x8( k, alpha,
a,
b, beta,
c,
rs_c, cs_c, NULL, NULL );
bli_dgemm_8x8( k,
alpha,
a,
b, beta,
c,
rs_c, cs_c,
data );
}
void bli_cgemm_8x8(
dim_t k,
scomplex* alpha,
scomplex* a,
scomplex* b,
scomplex* beta,
scomplex* c, inc_t rs_c, inc_t cs_c,
scomplex* a_next, scomplex* b_next
)
dim_t k,
scomplex* alpha,
scomplex* a,
scomplex* b,
scomplex* beta,
scomplex* c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
/* Just call the reference implementation. */
bli_cgemm_ref_mxn( k,
@@ -259,19 +259,18 @@ void bli_cgemm_8x8(
b,
beta,
c, rs_c, cs_c,
a_next,
b_next );
data );
}
void bli_zgemm_8x8(
dim_t k,
dcomplex* alpha,
dcomplex* a,
dcomplex* b,
dcomplex* beta,
dcomplex* c, inc_t rs_c, inc_t cs_c,
dcomplex* a_next, dcomplex* b_next
)
dim_t k,
dcomplex* alpha,
dcomplex* a,
dcomplex* b,
dcomplex* beta,
dcomplex* c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
/* Just call the reference implementation. */
bli_zgemm_ref_mxn( k,
@@ -280,21 +279,20 @@ void bli_zgemm_8x8(
b,
beta,
c, rs_c, cs_c,
a_next,
b_next );
data );
}
void bli_sgemm_8x8_mt(
dim_t k,
float* alpha,
float* a,
float* b,
float* beta,
float* c, inc_t rs_c, inc_t cs_c,
float* a_next, float* b_next,
int t_id
)
dim_t k,
float* alpha,
float* a,
float* b,
float* beta,
float* c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data,
dim_t t_id
)
{
/* Just call the reference implementation. */
bli_sgemm_ref_mxn( k,
@@ -303,20 +301,19 @@ void bli_sgemm_8x8_mt(
b,
beta,
c, rs_c, cs_c,
a_next,
b_next );
data );
}
void bli_cgemm_8x8_mt(
dim_t k,
scomplex* alpha,
scomplex* a,
scomplex* b,
scomplex* beta,
scomplex* c, inc_t rs_c, inc_t cs_c,
scomplex* a_next, scomplex* b_next,
int t_id
)
dim_t k,
scomplex* alpha,
scomplex* a,
scomplex* b,
scomplex* beta,
scomplex* c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data,
dim_t t_id
)
{
/* Just call the reference implementation. */
bli_cgemm_ref_mxn( k,
@@ -325,20 +322,19 @@ void bli_cgemm_8x8_mt(
b,
beta,
c, rs_c, cs_c,
a_next,
b_next );
data );
}
void bli_zgemm_8x8_mt(
dim_t k,
dcomplex* alpha,
dcomplex* a,
dcomplex* b,
dcomplex* beta,
dcomplex* c, inc_t rs_c, inc_t cs_c,
dcomplex* a_next, dcomplex* b_next,
int t_id
)
dim_t k,
dcomplex* alpha,
dcomplex* a,
dcomplex* b,
dcomplex* beta,
dcomplex* c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data,
dim_t t_id
)
{
/* Just call the reference implementation. */
bli_zgemm_ref_mxn( k,
@@ -347,6 +343,5 @@ void bli_zgemm_8x8_mt(
b,
beta,
c, rs_c, cs_c,
a_next,
b_next );
data );
}

View File

@@ -39,29 +39,30 @@
#define GENTPROT( ctype, ch, varname ) \
\
void PASTEMAC(ch,varname)( \
dim_t k, \
ctype* alpha, \
ctype* a, \
ctype* b, \
ctype* beta, \
ctype* c, inc_t rs_c, inc_t cs_c, \
ctype* a_next, ctype* b_next \
dim_t k, \
ctype* alpha, \
ctype* a, \
ctype* b, \
ctype* beta, \
ctype* c, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( gemm_8x8 )
#undef GENTPROT
#define GENTPROT( ctype, ch, varname ) \
\
void PASTEMAC(ch,varname)( \
dim_t k, \
ctype* alpha, \
ctype* a, \
ctype* b, \
ctype* beta, \
ctype* c, inc_t rs_c, inc_t cs_c, \
ctype* a_next, ctype* b_next, \
dim_t tid \
dim_t k, \
ctype* alpha, \
ctype* a, \
ctype* b, \
ctype* beta, \
ctype* c, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data, \
dim_t tid \
);
INSERT_GENTPROT_BASIC( gemm_8x8_mt )

View File

@@ -45,8 +45,7 @@ void PASTEMAC(ch,varname)( \
ctype* restrict b, \
ctype* restrict beta, \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
ctype* restrict a_next, \
ctype* restrict b_next \
auxinfo_t* data \
) \
{ \
ctype a0; \

View File

@@ -43,8 +43,7 @@ void PASTEMAC(ch,varname)( \
ctype* restrict b, \
ctype* restrict beta, \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
ctype* restrict a_next, \
ctype* restrict b_next \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( gemm_ref_4x4 )

View File

@@ -46,8 +46,7 @@ void PASTEMAC(ch,varname)( \
ctype* restrict bT, \
ctype* restrict b, \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
ctype* restrict a_next, \
ctype* restrict b_next \
auxinfo_t* data \
) \
{ \
ctype* minus_one = PASTEMAC(ch,m1); \
@@ -61,12 +60,12 @@ void PASTEMAC(ch,varname)( \
bT, \
alpha, \
b, rs_b, cs_b, \
a_next, \
b_next ); \
data ); \
\
PASTEMAC(ch,trsmukr)( a, \
b, \
c, rs_c, cs_c ); \
c, rs_c, cs_c, \
data ); \
}
INSERT_GENTFUNC_BASIC2( gemmtrsm_l_ref_4x4, GEMM_UKERNEL, TRSM_L_UKERNEL )

View File

@@ -44,8 +44,7 @@ void PASTEMAC(ch,varname)( \
ctype* restrict bT, \
ctype* restrict b, \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
ctype* restrict a_next, \
ctype* restrict b_next \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( gemmtrsm_l_ref_4x4 )

View File

@@ -46,8 +46,7 @@ void PASTEMAC(ch,varname)( \
ctype* restrict bB, \
ctype* restrict b, \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
ctype* restrict a_next, \
ctype* restrict b_next \
auxinfo_t* data \
) \
{ \
ctype* minus_one = PASTEMAC(ch,m1); \
@@ -61,11 +60,12 @@ void PASTEMAC(ch,varname)( \
bB, \
alpha, \
b, rs_b, cs_b, \
a_next, b_next ); \
data ); \
\
PASTEMAC(ch,trsmukr)( a, \
b, \
c, rs_c, cs_c ); \
c, rs_c, cs_c, \
data ); \
}
INSERT_GENTFUNC_BASIC2( gemmtrsm_u_ref_4x4, GEMM_UKERNEL, TRSM_U_UKERNEL )

View File

@@ -44,8 +44,7 @@ void PASTEMAC(ch,varname)( \
ctype* restrict bB, \
ctype* restrict b, \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
ctype* restrict a_next, \
ctype* restrict b_next \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( gemmtrsm_u_ref_4x4 )

View File

@@ -41,7 +41,8 @@
void PASTEMAC(ch,varname)( \
ctype* restrict a, \
ctype* restrict b, \
ctype* restrict c, inc_t rs_c, inc_t cs_c \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
) \
{ \
const dim_t rs_a = 1; \

View File

@@ -39,7 +39,8 @@
void PASTEMAC(ch,varname)( \
ctype* restrict a, \
ctype* restrict b, \
ctype* restrict c, inc_t rs_c, inc_t cs_c \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( trsm_l_ref_4x4 )

View File

@@ -41,7 +41,8 @@
void PASTEMAC(ch,varname)( \
ctype* restrict a, \
ctype* restrict b, \
ctype* restrict c, inc_t rs_c, inc_t cs_c \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
) \
{ \
const dim_t rs_a = 1; \

View File

@@ -39,7 +39,8 @@
void PASTEMAC(ch,varname)( \
ctype* restrict a, \
ctype* restrict b, \
ctype* restrict c, inc_t rs_c, inc_t cs_c \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( trsm_u_ref_4x4 )

View File

@@ -41,9 +41,7 @@ void bli_sgemm_opt_d4x4(
float* restrict b,
float* restrict beta,
float* restrict c, inc_t rs_c, inc_t cs_c,
float* restrict a_next,
float* restrict b_next
auxinfo_t* data
)
{
/* Just call the reference implementation. */
@@ -53,8 +51,7 @@ void bli_sgemm_opt_d4x4(
b,
beta,
c, rs_c, cs_c,
a_next,
b_next );
data );
}
void bli_dgemm_opt_d4x4(
@@ -64,16 +61,11 @@ void bli_dgemm_opt_d4x4(
double* restrict b,
double* restrict beta,
double* restrict c, inc_t rs_c, inc_t cs_c,
double* restrict a_next,
double* restrict b_next
auxinfo_t* data
)
{
dim_t k_iter;
dim_t k_left;
k_iter = k / 4;
k_left = k % 4;
dim_t k_iter = k / 4;
dim_t k_left = k % 4;
__asm__ volatile
(
@@ -551,8 +543,7 @@ void bli_cgemm_opt_d4x4(
scomplex* restrict b,
scomplex* restrict beta,
scomplex* restrict c, inc_t rs_c, inc_t cs_c,
scomplex* restrict a_next,
scomplex* restrict b_next
auxinfo_t* data
)
{
/* Just call the reference implementation. */
@@ -562,8 +553,7 @@ void bli_cgemm_opt_d4x4(
b,
beta,
c, rs_c, cs_c,
a_next,
b_next );
data );
}
void bli_zgemm_opt_d4x4(
@@ -573,8 +563,7 @@ void bli_zgemm_opt_d4x4(
dcomplex* restrict b,
dcomplex* restrict beta,
dcomplex* restrict c, inc_t rs_c, inc_t cs_c,
dcomplex* restrict a_next,
dcomplex* restrict b_next
auxinfo_t* data
)
{
/* Just call the reference implementation. */
@@ -584,7 +573,6 @@ void bli_zgemm_opt_d4x4(
b,
beta,
c, rs_c, cs_c,
a_next,
b_next );
data );
}

View File

@@ -43,8 +43,7 @@ void PASTEMAC(ch,varname)( \
ctype* restrict b, \
ctype* restrict beta, \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
ctype* restrict a_next, \
ctype* restrict b_next \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( gemm_opt_d4x4 )

View File

@@ -36,15 +36,15 @@
#include <assert.h>
void bli_sgemm_opt_30x8(
dim_t k, float* alpha,
float* a,
float* b,
float* beta,
float* c, inc_t rs_c, inc_t cs_c,
float* a_next,
float* b_next,
dim_t thread_id
)
dim_t k,
float* alpha,
float* a,
float* b,
float* beta,
float* c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data,
dim_t thread_id
)
{
}
@@ -324,15 +324,15 @@ int offsets[16] __attribute__((aligned(0x1000))) = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9
//#define MONITORS
//#define LOOPMON
void bli_dgemm_opt_30x8(
dim_t k, double* alpha,
double* a,
double* b,
double* beta,
double* c, inc_t rs_c, inc_t cs_c,
double* a_next,
double* b_next,
dim_t thread_id
)
dim_t k,
double* alpha,
double* a,
double* b,
double* beta,
double* c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data,
dim_t thread_id
)
{
int * offsetPtr = &offsets[0];
@@ -627,30 +627,29 @@ void bli_dgemm_opt_30x8(
}
void bli_cgemm_opt_30x8(
dim_t k, scomplex* alpha,
scomplex* a,
scomplex* b,
scomplex* beta,
scomplex* c, inc_t rs_c, inc_t cs_c,
scomplex* a_next,
scomplex* b_next,
dim_t thread_id
)
dim_t k,
scomplex* alpha,
scomplex* a,
scomplex* b,
scomplex* beta,
scomplex* c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data,
dim_t thread_id
)
{
}
void bli_zgemm_opt_30x8(
dim_t k, dcomplex* alpha,
dcomplex* a,
dcomplex* b,
dcomplex* beta,
dcomplex* c, inc_t rs_c, inc_t cs_c,
dcomplex* a_next,
dcomplex* b_next,
dim_t thread_id
)
dim_t k,
dcomplex* alpha,
dcomplex* a,
dcomplex* b,
dcomplex* beta,
dcomplex* c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data,
dim_t thread_id
)
{
}

View File

@@ -39,14 +39,14 @@
#define GENTPROT( ctype, ch, varname ) \
\
void PASTEMAC(ch,varname)( \
dim_t k, \
ctype* alpha, \
ctype* a, \
ctype* b, \
ctype* beta, \
ctype* c, inc_t rs_c, inc_t cs_c, \
ctype* a_next, ctype* b_next, \
dim_t thread_id \
dim_t k, \
ctype* alpha, \
ctype* a, \
ctype* b, \
ctype* beta, \
ctype* c, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data, \
dim_t thread_id \
);
INSERT_GENTPROT_BASIC( gemm_opt_30x8 )

View File

@@ -49,15 +49,14 @@
* b is k x nr in packed row-maj format (leading dim is nr)
*/
void bli_sgemm_opt_8x4(
dim_t k,
float* restrict alpha,
float* restrict a,
float* restrict b,
float* restrict beta,
float* restrict c, inc_t rs_c, inc_t cs_c,
float* restrict a_next,
float* restrict b_next
)
dim_t k,
float* restrict alpha,
float* restrict a,
float* restrict b,
float* restrict beta,
float* restrict c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
#if 0 || defined(UTEST)
const long MR = BLIS_DEFAULT_MR_S, NR = BLIS_DEFAULT_NR_S;
@@ -74,7 +73,7 @@ void bli_sgemm_opt_8x4(
}
}
#else
bli_sgemm_ref_mxn(k, alpha, a, b, beta, c, rs_c, cs_c, a_next, b_next);
bli_sgemm_ref_mxn(k, alpha, a, b, beta, c, rs_c, cs_c, data);
#endif
}
@@ -88,15 +87,14 @@ void bli_sgemm_opt_8x4(
* b is k x nr in packed row-maj format (leading dim is nr)
*/
void bli_dgemm_opt_8x4(
dim_t k,
double* restrict alpha,
double* restrict a,
double* restrict b,
double* restrict beta,
double* restrict c, inc_t rs_c, inc_t cs_c,
double* restrict a_next,
double* restrict b_next
)
dim_t k,
double* restrict alpha,
double* restrict a,
double* restrict b,
double* restrict beta,
double* restrict c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
#if 1
if (rs_c == 1) {
@@ -447,7 +445,7 @@ void bli_dgemm_opt_8x4(
}
}
#else
bli_dgemm_ref_mxn(k, alpha, a, b, beta, c, rs_c, cs_c, a_next, b_next);
bli_dgemm_ref_mxn(k, alpha, a, b, beta, c, rs_c, cs_c, data);
#endif
}
}
@@ -462,14 +460,13 @@ void bli_dgemm_opt_8x4(
* b is k x nr in packed row-maj format (leading dim is nr)
*/
void bli_cgemm_opt_8x4(
dim_t k,
scomplex* restrict alpha,
scomplex* restrict a,
scomplex* restrict b,
scomplex* restrict beta,
scomplex* restrict c, inc_t rs_c, inc_t cs_c,
scomplex* restrict a_next,
scomplex* restrict b_next
dim_t k,
scomplex* restrict alpha,
scomplex* restrict a,
scomplex* restrict b,
scomplex* restrict beta,
scomplex* restrict c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
#if 0 || defined(UTEST)
@@ -498,7 +495,7 @@ void bli_cgemm_opt_8x4(
}
}
#else
bli_cgemm_ref_mxn(k, alpha, a, b, beta, c, rs_c, cs_c, a_next, b_next);
bli_cgemm_ref_mxn(k, alpha, a, b, beta, c, rs_c, cs_c, data);
#endif
}
@@ -512,14 +509,13 @@ void bli_cgemm_opt_8x4(
* b is k x nr in packed row-maj format (leading dim is nr)
*/
void bli_zgemm_opt_8x4(
dim_t k,
dcomplex* restrict alpha,
dcomplex* restrict a,
dcomplex* restrict b,
dcomplex* restrict beta,
dcomplex* restrict c, inc_t rs_c, inc_t cs_c,
dcomplex* restrict a_next,
dcomplex* restrict b_next
dim_t k,
dcomplex* restrict alpha,
dcomplex* restrict a,
dcomplex* restrict b,
dcomplex* restrict beta,
dcomplex* restrict c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
#if 0 || defined(UTEST)
@@ -548,7 +544,7 @@ void bli_zgemm_opt_8x4(
}
}
#else
bli_zgemm_ref_mxn(k, alpha, a, b, beta, c, rs_c, cs_c, a_next, b_next);
bli_zgemm_ref_mxn(k, alpha, a, b, beta, c, rs_c, cs_c, data);
#endif
}

View File

@@ -42,25 +42,23 @@
#endif
void bli_sgemm_opt_8x4(
dim_t k,
float* restrict alpha,
float* restrict a,
float* restrict b,
float* restrict beta,
float* restrict c, inc_t rs_c, inc_t cs_c,
float* restrict a_next,
float* restrict b_next
dim_t k,
float* restrict alpha,
float* restrict a,
float* restrict b,
float* restrict beta,
float* restrict c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
);
void bli_dgemm_opt_8x4(
dim_t k,
double* restrict alpha,
double* restrict a,
double* restrict b,
double* restrict beta,
double* restrict c, inc_t rs_c, inc_t cs_c,
double* restrict a_next,
double* restrict b_next
dim_t k,
double* restrict alpha,
double* restrict a,
double* restrict b,
double* restrict beta,
double* restrict c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
);
void bli_cgemm_opt_8x4(
@@ -70,8 +68,7 @@ void bli_cgemm_opt_8x4(
scomplex* restrict b,
scomplex* restrict beta,
scomplex* restrict c, inc_t rs_c, inc_t cs_c,
scomplex* restrict a_next,
scomplex* restrict b_next
auxinfo_t* data
);
void bli_zgemm_opt_8x4(
@@ -81,8 +78,7 @@ void bli_zgemm_opt_8x4(
dcomplex* restrict b,
dcomplex* restrict beta,
dcomplex* restrict c, inc_t rs_c, inc_t cs_c,
dcomplex* restrict a_next,
dcomplex* restrict b_next
auxinfo_t* data
);
#endif

View File

@@ -29,15 +29,14 @@
* b is k x nr in packed row-maj format (leading dim is nr)
*/
void bli_dgemm_check(
dim_t k,
double* restrict alpha,
double* restrict a,
double* restrict b,
double* restrict beta,
double* restrict c, inc_t rs_c, inc_t cs_c,
double* restrict a_next,
double* restrict b_next
)
dim_t k,
double* restrict alpha,
double* restrict a,
double* restrict b,
double* restrict beta,
double* restrict c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
int i, j, kk;
double c00;
@@ -100,9 +99,9 @@ int main(int argc, char *argv[])
/* First check the results */
bli_dgemm_opt_8x4(k, &alpha, A, B, &beta, C, rs_c, cs_c, 0, 0);
bli_dgemm_opt_8x4(k, &alpha, A, B, &beta, C, rs_c, cs_c, NULL);
bli_dgemm_check(k, &alpha, A, B, &beta, C2, rs_c, cs_c, 0, 0);
bli_dgemm_check(k, &alpha, A, B, &beta, C2, rs_c, cs_c, NULL);
for (i=0, errors=0; i<MR*NR-1; i++) {
if (fabs(C[i] - C2[i]) > EPSILON) {
@@ -121,7 +120,7 @@ int main(int argc, char *argv[])
gettimeofday(&tv_start, NULL);
for (i=0; i<iters; i++) {
bli_dgemm_opt_8x4(k, &alpha, A, B, &beta, C, rs_c, cs_c, 0, 0);
bli_dgemm_opt_8x4(k, &alpha, A, B, &beta, C, rs_c, cs_c, NULL);
}
gettimeofday(&tv_end, NULL);

View File

@@ -39,13 +39,12 @@
void bli_sgemm_opt_8x4_ref_u4_nodupl_avx1(
dim_t k,
float* restrict alpha,
float* restrict a,
float* restrict b,
float* restrict beta,
float* restrict c, inc_t rs_c, inc_t cs_c,
float* restrict a_next,
float* restrict b_next
float* restrict alpha,
float* restrict a,
float* restrict b,
float* restrict beta,
float* restrict c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
/* Just call the reference implementation. */
@@ -55,30 +54,28 @@ void bli_sgemm_opt_8x4_ref_u4_nodupl_avx1(
b,
beta,
c, rs_c, cs_c,
a_next,
b_next );
data );
}
void bli_dgemm_opt_8x4_ref_u4_nodupl_avx1(
dim_t k,
double* restrict alpha,
double* restrict a,
double* restrict b,
double* restrict beta,
double* restrict c, inc_t rs_c, inc_t cs_c,
double* restrict a_next,
double* restrict b_next
dim_t k,
double* restrict alpha,
double* restrict a,
double* restrict b,
double* restrict beta,
double* restrict c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
dim_t i;
//void* a_next = bli_auxinfo_next_a( data );
void* b_next = bli_auxinfo_next_b( data );
dim_t k_iter;
dim_t k_left;
dim_t k_iter = k / 2;
dim_t k_left = k % 2;
k_iter = k / 2;
k_left = k % 2;
dim_t i;
double *c00, *c01, *c02, *c03;
double *c40, *c41, *c42, *c43;
@@ -641,8 +638,7 @@ void bli_cgemm_opt_8x4_ref_u4_nodupl_avx1(
scomplex* restrict b,
scomplex* restrict beta,
scomplex* restrict c, inc_t rs_c, inc_t cs_c,
scomplex* restrict a_next,
scomplex* restrict b_next
auxinfo_t* data
)
{
/* Just call the reference implementation. */
@@ -652,8 +648,7 @@ void bli_cgemm_opt_8x4_ref_u4_nodupl_avx1(
b,
beta,
c, rs_c, cs_c,
a_next,
b_next );
data );
}
@@ -665,8 +660,7 @@ void bli_zgemm_opt_8x4_ref_u4_nodupl_avx1(
dcomplex* restrict b,
dcomplex* restrict beta,
dcomplex* restrict c, inc_t rs_c, inc_t cs_c,
dcomplex* restrict a_next,
dcomplex* restrict b_next
auxinfo_t* data
)
{
/* Just call the reference implementation. */
@@ -676,7 +670,6 @@ void bli_zgemm_opt_8x4_ref_u4_nodupl_avx1(
b,
beta,
c, rs_c, cs_c,
a_next,
b_next );
data );
}

View File

@@ -43,8 +43,7 @@ void PASTEMAC(ch,varname)( \
ctype* restrict b, \
ctype* restrict beta, \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
ctype* restrict a_next, \
ctype* restrict b_next \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( gemm_opt_8x4_ref_u4_nodupl_avx1 )

View File

@@ -41,15 +41,14 @@ void bli_sgemm_opt_d4x4(
float* restrict b,
float* restrict beta,
float* restrict c, inc_t rs_c, inc_t cs_c,
float* restrict a_next,
float* restrict b_next
auxinfo_t* data
)
{
dim_t k_iter;
dim_t k_left;
void* a_next = bli_auxinfo_next_a( data );
void* b_next = bli_auxinfo_next_b( data );
k_iter = k / 4;
k_left = k % 4;
dim_t k_iter = k / 4;
dim_t k_left = k % 4;
__asm__ volatile
(
@@ -723,15 +722,14 @@ void bli_dgemm_opt_d4x4(
double* restrict b,
double* restrict beta,
double* restrict c, inc_t rs_c, inc_t cs_c,
double* restrict a_next,
double* restrict b_next
auxinfo_t* data
)
{
dim_t k_iter;
dim_t k_left;
void* a_next = bli_auxinfo_next_a( data );
void* b_next = bli_auxinfo_next_b( data );
k_iter = k / 4;
k_left = k % 4;
dim_t k_iter = k / 4;
dim_t k_left = k % 4;
__asm__ volatile
(
@@ -1339,8 +1337,7 @@ void bli_cgemm_opt_d4x4(
scomplex* restrict b,
scomplex* restrict beta,
scomplex* restrict c, inc_t rs_c, inc_t cs_c,
scomplex* restrict a_next,
scomplex* restrict b_next
auxinfo_t* data
)
{
/* Just call the reference implementation. */
@@ -1350,8 +1347,7 @@ void bli_cgemm_opt_d4x4(
b,
beta,
c, rs_c, cs_c,
a_next,
b_next );
data );
}
void bli_zgemm_opt_d4x4(
@@ -1361,8 +1357,7 @@ void bli_zgemm_opt_d4x4(
dcomplex* restrict b,
dcomplex* restrict beta,
dcomplex* restrict c, inc_t rs_c, inc_t cs_c,
dcomplex* restrict a_next,
dcomplex* restrict b_next
auxinfo_t* data
)
{
/* Just call the reference implementation. */
@@ -1372,7 +1367,6 @@ void bli_zgemm_opt_d4x4(
b,
beta,
c, rs_c, cs_c,
a_next,
b_next );
data );
}

View File

@@ -43,8 +43,7 @@ void PASTEMAC(ch,varname)( \
ctype* restrict b, \
ctype* restrict beta, \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
ctype* restrict a_next, \
ctype* restrict b_next \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( gemm_opt_d4x4 )

View File

@@ -42,8 +42,7 @@ void bli_sgemmtrsm_l_opt_d4x4(
float* restrict b01,
float* restrict b11,
float* restrict c11, inc_t rs_c, inc_t cs_c,
float* restrict a_next,
float* restrict b_next
auxinfo_t* data
)
{
/* Just call the reference implementation. */
@@ -54,8 +53,7 @@ void bli_sgemmtrsm_l_opt_d4x4(
b01,
b11,
c11, rs_c, cs_c,
a_next,
b_next );
data );
}
void bli_dgemmtrsm_l_opt_d4x4(
@@ -66,15 +64,13 @@ void bli_dgemmtrsm_l_opt_d4x4(
double* restrict b01,
double* restrict b11,
double* restrict c11, inc_t rs_c, inc_t cs_c,
double* restrict a_next,
double* restrict b_next
auxinfo_t* data
)
{
dim_t k_iter;
dim_t k_left;
void* b_next = bli_auxinfo_next_b( data );
k_iter = k / 4;
k_left = k % 4;
dim_t k_iter = k / 4;
dim_t k_left = k % 4;
__asm__ volatile
(
@@ -551,8 +547,7 @@ void bli_cgemmtrsm_l_opt_d4x4(
scomplex* restrict b01,
scomplex* restrict b11,
scomplex* restrict c11, inc_t rs_c, inc_t cs_c,
scomplex* restrict a_next,
scomplex* restrict b_next
auxinfo_t* data
)
{
/* Just call the reference implementation. */
@@ -563,8 +558,7 @@ void bli_cgemmtrsm_l_opt_d4x4(
b01,
b11,
c11, rs_c, cs_c,
a_next,
b_next );
data );
}
void bli_zgemmtrsm_l_opt_d4x4(
@@ -575,8 +569,7 @@ void bli_zgemmtrsm_l_opt_d4x4(
dcomplex* restrict b01,
dcomplex* restrict b11,
dcomplex* restrict c11, inc_t rs_c, inc_t cs_c,
dcomplex* restrict a_next,
dcomplex* restrict b_next
auxinfo_t* data
)
{
/* Just call the reference implementation. */
@@ -587,7 +580,6 @@ void bli_zgemmtrsm_l_opt_d4x4(
b01,
b11,
c11, rs_c, cs_c,
a_next,
b_next );
data );
}

View File

@@ -44,8 +44,7 @@ void PASTEMAC(ch,varname)( \
ctype* restrict b01, \
ctype* restrict b11, \
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
ctype* restrict a_next, \
ctype* restrict b_next \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( gemmtrsm_l_opt_d4x4 )

View File

@@ -42,8 +42,7 @@ void bli_sgemmtrsm_u_opt_d4x4(
float* restrict b21,
float* restrict b11,
float* restrict c11, inc_t rs_c, inc_t cs_c,
float* restrict a_next,
float* restrict b_next
auxinfo_t* data
)
{
/* Just call the reference implementation. */
@@ -54,8 +53,7 @@ void bli_sgemmtrsm_u_opt_d4x4(
b21,
b11,
c11, rs_c, cs_c,
a_next,
b_next );
data );
}
void bli_dgemmtrsm_u_opt_d4x4(
@@ -66,15 +64,14 @@ void bli_dgemmtrsm_u_opt_d4x4(
double* restrict b21,
double* restrict b11,
double* restrict c11, inc_t rs_c, inc_t cs_c,
double* restrict a_next,
double* restrict b_next
auxinfo_t* data
)
{
dim_t k_iter;
dim_t k_left;
void* b_next = bli_auxinfo_next_b( data );
dim_t k_iter = k / 4;
dim_t k_left = k % 4;
k_iter = k / 4;
k_left = k % 4;
__asm__ volatile
(
@@ -537,8 +534,7 @@ void bli_cgemmtrsm_u_opt_d4x4(
scomplex* restrict b21,
scomplex* restrict b11,
scomplex* restrict c11, inc_t rs_c, inc_t cs_c,
scomplex* restrict a_next,
scomplex* restrict b_next
auxinfo_t* data
)
{
/* Just call the reference implementation. */
@@ -549,8 +545,7 @@ void bli_cgemmtrsm_u_opt_d4x4(
b21,
b11,
c11, rs_c, cs_c,
a_next,
b_next );
data );
}
void bli_zgemmtrsm_u_opt_d4x4(
@@ -561,8 +556,7 @@ void bli_zgemmtrsm_u_opt_d4x4(
dcomplex* restrict b21,
dcomplex* restrict b11,
dcomplex* restrict c11, inc_t rs_c, inc_t cs_c,
dcomplex* restrict a_next,
dcomplex* restrict b_next
auxinfo_t* data
)
{
/* Just call the reference implementation. */
@@ -573,7 +567,6 @@ void bli_zgemmtrsm_u_opt_d4x4(
b21,
b11,
c11, rs_c, cs_c,
a_next,
b_next );
data );
}

View File

@@ -44,8 +44,7 @@ void PASTEMAC(ch,varname)( \
ctype* restrict b21, \
ctype* restrict b11, \
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
ctype* restrict a_next, \
ctype* restrict b_next \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( gemmtrsm_u_opt_d4x4 )

View File

@@ -37,19 +37,22 @@
void bli_strsm_l_opt_d4x4(
float* restrict a11,
float* restrict b11,
float* restrict c11, inc_t rs_c, inc_t cs_c
float* restrict c11, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
/* Just call the reference implementation. */
bli_strsm_l_ref_mxn( a11,
b11,
c11, rs_c, cs_c );
c11, rs_c, cs_c,
data );
}
void bli_dtrsm_l_opt_d4x4(
double* restrict a11,
double* restrict b11,
double* restrict c11, inc_t rs_c, inc_t cs_c
double* restrict c11, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
__asm__ volatile
@@ -208,24 +211,28 @@ void bli_dtrsm_l_opt_d4x4(
void bli_ctrsm_l_opt_d4x4(
scomplex* restrict a11,
scomplex* restrict b11,
scomplex* restrict c11, inc_t rs_c, inc_t cs_c
scomplex* restrict c11, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
/* Just call the reference implementation. */
bli_ctrsm_l_ref_mxn( a11,
b11,
c11, rs_c, cs_c );
c11, rs_c, cs_c,
data );
}
void bli_ztrsm_l_opt_d4x4(
dcomplex* restrict a11,
dcomplex* restrict b11,
dcomplex* restrict c11, inc_t rs_c, inc_t cs_c
dcomplex* restrict c11, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
/* Just call the reference implementation. */
bli_ztrsm_l_ref_mxn( a11,
b11,
c11, rs_c, cs_c );
c11, rs_c, cs_c,
data );
}

View File

@@ -39,7 +39,8 @@
void PASTEMAC(ch,varname)( \
ctype* restrict a11, \
ctype* restrict b11, \
ctype* restrict c11, inc_t rs_c, inc_t cs_c \
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( trsm_l_opt_d4x4 )

View File

@@ -37,19 +37,22 @@
void bli_strsm_u_opt_d4x4(
float* restrict a11,
float* restrict b11,
float* restrict c11, inc_t rs_c, inc_t cs_c
float* restrict c11, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
/* Just call the reference implementation. */
bli_strsm_u_ref_mxn( a11,
b11,
c11, rs_c, cs_c );
c11, rs_c, cs_c,
data );
}
void bli_dtrsm_u_opt_d4x4(
double* restrict a11,
double* restrict b11,
double* restrict c11, inc_t rs_c, inc_t cs_c
double* restrict c11, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
__asm__ volatile
@@ -211,24 +214,28 @@ void bli_dtrsm_u_opt_d4x4(
void bli_ctrsm_u_opt_d4x4(
scomplex* restrict a11,
scomplex* restrict b11,
scomplex* restrict c11, inc_t rs_c, inc_t cs_c
scomplex* restrict c11, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
/* Just call the reference implementation. */
bli_ctrsm_u_ref_mxn( a11,
b11,
c11, rs_c, cs_c );
c11, rs_c, cs_c,
data );
}
void bli_ztrsm_u_opt_d4x4(
dcomplex* restrict a11,
dcomplex* restrict b11,
dcomplex* restrict c11, inc_t rs_c, inc_t cs_c
dcomplex* restrict c11, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
/* Just call the reference implementation. */
bli_ztrsm_u_ref_mxn( a11,
b11,
c11, rs_c, cs_c );
c11, rs_c, cs_c,
data );
}

View File

@@ -39,7 +39,8 @@
void PASTEMAC(ch,varname)( \
ctype* restrict a11, \
ctype* restrict b11, \
ctype* restrict c11, inc_t rs_c, inc_t cs_c \
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( trsm_u_opt_d4x4 )

View File

@@ -36,14 +36,14 @@
void bli_sgemm_4x6(
dim_t k,
float* alpha,
float* a,
float* b,
float* beta,
float* c, inc_t rs_c, inc_t cs_c,
float* a_next, float* b_next
void bli_sgemm_4x6(
dim_t k,
float* restrict alpha,
float* restrict a,
float* restrict b,
float* restrict beta,
float* restrict c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
/* Just call the reference implementation. */
@@ -53,24 +53,21 @@
b,
beta,
c, rs_c, cs_c,
a_next,
b_next );
data );
}
void bli_dgemm_4x6(
dim_t k,
double* alpha,
double* a,
double* b,
double* beta,
double* c, inc_t rs_c, inc_t cs_c,
double* a_next, double* b_next
void bli_dgemm_4x6(
dim_t k,
double* restrict alpha,
double* restrict a,
double* restrict b,
double* restrict beta,
double* restrict c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
dim_t k_iter;
dim_t k_left;
k_iter = k / 16;
k_left = k % 16;
dim_t k_iter = k / 16;
dim_t k_left = k % 16;
__asm__
(
@@ -674,14 +671,14 @@
);
}
void bli_cgemm_4x6(
dim_t k,
scomplex* alpha,
scomplex* a,
scomplex* b,
scomplex* beta,
scomplex* c, inc_t rs_c, inc_t cs_c,
scomplex* a_next, scomplex* b_next
void bli_cgemm_4x6(
dim_t k,
scomplex* restrict alpha,
scomplex* restrict a,
scomplex* restrict b,
scomplex* restrict beta,
scomplex* restrict c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
/* Just call the reference implementation. */
@@ -691,18 +688,17 @@
b,
beta,
c, rs_c, cs_c,
a_next,
b_next );
data );
}
void bli_zgemm_4x6(
dim_t k,
dcomplex* alpha,
dcomplex* a,
dcomplex* b,
dcomplex* beta,
dcomplex* c, inc_t rs_c, inc_t cs_c,
dcomplex* a_next, dcomplex* b_next
void bli_zgemm_4x6(
dim_t k,
dcomplex* restrict alpha,
dcomplex* restrict a,
dcomplex* restrict b,
dcomplex* restrict beta,
dcomplex* restrict c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
)
{
/* Just call the reference implementation. */
@@ -712,7 +708,6 @@
b,
beta,
c, rs_c, cs_c,
a_next,
b_next );
data );
}

View File

@@ -39,13 +39,13 @@
#define GENTPROT( ctype, ch, varname ) \
\
void PASTEMAC(ch,varname)( \
dim_t k, \
ctype* alpha, \
ctype* a, \
ctype* b, \
ctype* beta, \
ctype* c, inc_t rs_c, inc_t cs_c, \
ctype* a_next, ctype* b_next \
dim_t k, \
ctype* restrict alpha, \
ctype* restrict a, \
ctype* restrict b, \
ctype* restrict beta, \
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( gemm_4x6 )

View File

@@ -119,7 +119,7 @@ endif
# BLIS library and header path. This is simply wherever it was installed.
BLIS_LIB_PATH := $(INSTALL_PREFIX)/lib
BLIS_INC_PATH := $(INSTALL_PREFIX)/include
BLIS_INC_PATH := $(INSTALL_PREFIX)/include/blis
# BLIS library.
BLIS_LIB := $(BLIS_LIB_PATH)/libblis.a
@@ -174,10 +174,8 @@ TEST_SIZES_SRC := test_size.c
CFLAGS += -I$(BLIS_INC_PATH) -I$(TEST_SRC_PATH)
LINKER := $(CC)
LDFLAGS := -L/usr/lib/gcc/x86_64-redhat-linux/4.1.2 -L/usr/lib/gcc/x86_64-redhat-linux/4.1.2/../../../../lib64 -L/lib/../lib64 -L/usr/lib/../lib64 -lgfortranbegin -lgfortran -lm
LDFLAGS += -lpthread
#LDFLAGS := -L/usr/lib/gcc/i486-linux-gnu/4.4.3 -L/usr/lib/gcc/i486-linux-gnu/4.4.3/../../../../lib -L/lib/../lib -L/usr/lib/../lib -L/usr/lib/gcc/i486-linux-gnu/4.4.3/../../.. -L/usr/lib/i486-linux-gnu -lgfortranbegin -lgfortran -lm
LDFLAGS := -L/home/00146/field/gnu/gcc-4.8.2/lib64
LDFLAGS += -lgfortran -lm -lpthread
#
# --- Targets/rules ------------------------------------------------------------

View File

@@ -430,7 +430,7 @@ void PASTEMAC(ch,varname)( \
b, \
beta, \
c, rs_c, cs_c, \
a, b ); \
NULL ); \
}
INSERT_GENTFUNC_BASIC( gemm_ukr, GEMM_UKERNEL )

View File

@@ -485,15 +485,14 @@ void bli_gemmtrsm_ukr_make_subparts( dim_t k,
#define FUNCPTR_T gemmtrsm_ukr_fp
typedef void (*FUNCPTR_T)(
dim_t k,
void* alpha,
void* a1x,
void* a11,
void* bx1,
void* b11,
void* c11, inc_t rs_c, inc_t cs_c,
void* a_next,
void* b_next
dim_t k,
void* alpha,
void* a1x,
void* a11,
void* bx1,
void* b11,
void* c11, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
);
static FUNCPTR_T GENARRAY(ftypes_l,gemmtrsm_l_ukr);
@@ -515,7 +514,7 @@ void bli_gemmtrsm_ukr( obj_t* alpha,
void* buf_a11 = bli_obj_buffer_at_off( *a11 );
void* buf_bx1 = bli_obj_buffer_at_off( *bx1 );
void* buf_bx1 = bli_obj_buffer_at_off( *bx1 );
void* buf_b11 = bli_obj_buffer_at_off( *b11 );
@@ -527,6 +526,17 @@ void bli_gemmtrsm_ukr( obj_t* alpha,
FUNCPTR_T f;
auxinfo_t data;
// Fill the auxinfo_t struct in case the micro-kernel uses it.
if ( bli_obj_is_lower( *a11 ) ) { bli_auxinfo_set_next_a( buf_a1x, data ); }
else { bli_auxinfo_set_next_a( buf_a11, data ); }
bli_auxinfo_set_next_b( buf_bx1, data );
// STILL NEED TO FILL IN PANEL STRIDE FIELDS!
// Index into the type combination array to extract the correct
// function pointer.
if ( bli_obj_is_lower( *a11 ) ) f = ftypes_l[dt];
@@ -540,8 +550,7 @@ void bli_gemmtrsm_ukr( obj_t* alpha,
buf_bx1,
buf_b11,
buf_c11, rs_c, cs_c,
buf_a1x,
buf_bx1 );
&data );
}
@@ -549,15 +558,14 @@ void bli_gemmtrsm_ukr( obj_t* alpha,
#define GENTFUNC( ctype, ch, varname, ukrname ) \
\
void PASTEMAC(ch,varname)( \
dim_t k, \
void* alpha, \
void* a1x, \
void* a11, \
void* bx1, \
void* b11, \
void* c11, inc_t rs_c, inc_t cs_c, \
void* a_next, \
void* b_next \
dim_t k, \
void* alpha, \
void* a1x, \
void* a11, \
void* bx1, \
void* b11, \
void* c11, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
) \
{ \
PASTEMAC(ch,ukrname)( k, \
@@ -567,8 +575,7 @@ void PASTEMAC(ch,varname)( \
bx1, \
b11, \
c11, rs_c, cs_c, \
a_next, \
b_next ); \
data ); \
}
INSERT_GENTFUNC_BASIC( gemmtrsm_l_ukr, GEMMTRSM_L_UKERNEL )

View File

@@ -48,15 +48,14 @@ void bli_gemmtrsm_ukr( obj_t* alpha,
#define GENTPROT( ctype, ch, varname ) \
\
void PASTEMAC(ch,varname)( \
dim_t k, \
void* alpha, \
void* a1x, \
void* a11, \
void* bx1, \
void* b11, \
void* c11, inc_t rs_c, inc_t cs_c, \
void* a_next, \
void* b_next \
dim_t k, \
void* alpha, \
void* a1x, \
void* a11, \
void* bx1, \
void* b11, \
void* c11, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( gemmtrsm_l_ukr )

View File

@@ -377,9 +377,10 @@ void libblis_test_trsm_ukr_check( side_t side,
#define FUNCPTR_T trsm_ukr_fp
typedef void (*FUNCPTR_T)(
void* a,
void* b,
void* c, inc_t rs_c, inc_t cs_c
void* a,
void* b,
void* c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
);
static FUNCPTR_T GENARRAY(ftypes_l,trsm_l_ukr);
@@ -402,6 +403,16 @@ void bli_trsm_ukr( obj_t* a,
FUNCPTR_T f;
auxinfo_t data;
// Fill the auxinfo_t struct in case the micro-kernel uses it.
bli_auxinfo_set_next_a( buf_a, data );
bli_auxinfo_set_next_b( buf_b, data );
// STILL NEED TO FILL IN PANEL STRIDE FIELDS!
// Index into the type combination array to extract the correct
// function pointer.
if ( bli_obj_is_lower( *a ) ) f = ftypes_l[dt];
@@ -410,7 +421,8 @@ void bli_trsm_ukr( obj_t* a,
// Invoke the function.
f( buf_a,
buf_b,
buf_c, rs_c, cs_c );
buf_c, rs_c, cs_c,
&data );
}
@@ -418,14 +430,16 @@ void bli_trsm_ukr( obj_t* a,
#define GENTFUNC( ctype, ch, varname, ukrname ) \
\
void PASTEMAC(ch,varname)( \
void* a, \
void* b, \
void* c, inc_t rs_c, inc_t cs_c \
void* a, \
void* b, \
void* c, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
) \
{ \
PASTEMAC(ch,ukrname)( a, \
b, \
c, rs_c, cs_c ); \
c, rs_c, cs_c, \
data ); \
}
INSERT_GENTFUNC_BASIC( trsm_l_ukr, TRSM_L_UKERNEL )

View File

@@ -45,9 +45,10 @@ void bli_trsm_ukr( obj_t* a,
#define GENTPROT( ctype, ch, varname ) \
\
void PASTEMAC(ch,varname)( \
void* a, \
void* b, \
void* c, inc_t rs_c, inc_t cs_c \
void* a, \
void* b, \
void* c, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( trsm_l_ukr )