mirror of
https://github.com/amd/blis.git
synced 2026-06-29 02:37:05 +00:00
Introduced auxinfo_t argument to micro-kernels.
Details: - Removed a_next and b_next arguments to micro-kernels and replaced them with a pointer to a new datatype, auxinfo_t, which is simply a struct that holds a_next and b_next. The struct may hold other auxiliary information that may be useful to a micro-kernel, such as micro-panel stride. Micro-kernels may access struct fields via accessor macros defined in bli_auxinfo_macro_defs.h. - Updated all instances of micro-kernel definitions, micro-kernel calls, as well as macro-kernels (for declaring and initializing the structs) according to above change.
This commit is contained in:
@@ -169,6 +169,7 @@ void PASTEMAC(ch,varname)( \
|
||||
inc_t rstep_a; \
|
||||
inc_t cstep_b; \
|
||||
inc_t rstep_c, cstep_c; \
|
||||
auxinfo_t aux; \
|
||||
\
|
||||
/*
|
||||
Assumptions/assertions:
|
||||
@@ -207,6 +208,10 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
rstep_c = rs_c * MR; \
|
||||
cstep_c = cs_c * NR; \
|
||||
\
|
||||
/* Save the panel strides of A and B to the auxinfo_t object. */ \
|
||||
bli_auxinfo_set_ps_a( ps_a, aux ); \
|
||||
bli_auxinfo_set_ps_b( ps_b, aux ); \
|
||||
\
|
||||
b1 = b_cast; \
|
||||
c1 = c_cast; \
|
||||
@@ -242,6 +247,11 @@ void PASTEMAC(ch,varname)( \
|
||||
if ( j == n_iter - 1 ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
\
|
||||
/* Save addresses of next panels of A and B to the auxinfo_t
|
||||
object. */ \
|
||||
bli_auxinfo_set_next_a( a2, aux ); \
|
||||
bli_auxinfo_set_next_b( b2, aux ); \
|
||||
\
|
||||
/* Handle interior and edge cases separately. */ \
|
||||
if ( m_cur == MR && n_cur == NR ) \
|
||||
@@ -253,7 +263,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1, \
|
||||
beta_cast, \
|
||||
c11, rs_c, cs_c, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
@@ -264,7 +274,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1, \
|
||||
zero, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
\
|
||||
/* Scale the bottom edge of C and add the result from above. */ \
|
||||
PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \
|
||||
|
||||
@@ -179,6 +179,7 @@ void PASTEMAC(ch,varname)( \
|
||||
inc_t rstep_a; \
|
||||
inc_t cstep_b; \
|
||||
inc_t rstep_c, cstep_c; \
|
||||
auxinfo_t aux; \
|
||||
\
|
||||
/*
|
||||
Assumptions/assertions:
|
||||
@@ -217,6 +218,10 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
rstep_c = rs_c * MR; \
|
||||
cstep_c = cs_c * NR; \
|
||||
\
|
||||
/* Save the panel strides of A and B to the auxinfo_t object. */ \
|
||||
bli_auxinfo_set_ps_a( ps_a, aux ); \
|
||||
bli_auxinfo_set_ps_b( ps_b, aux ); \
|
||||
\
|
||||
b1 = b_cast; \
|
||||
c1 = c_cast; \
|
||||
@@ -224,6 +229,9 @@ void PASTEMAC(ch,varname)( \
|
||||
/* Since we pack micro-panels of B incrementaly, one at a time, the
|
||||
address of the next micro-panel of B remains constant. */ \
|
||||
b2 = bp; \
|
||||
\
|
||||
/* Save address of next panel of B to the auxinfo_t object. */ \
|
||||
bli_auxinfo_set_next_b( b2, aux ); \
|
||||
\
|
||||
/* Loop over the n dimension (NR columns at a time). */ \
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
@@ -257,6 +265,9 @@ void PASTEMAC(ch,varname)( \
|
||||
{ \
|
||||
a2 = a_cast; \
|
||||
} \
|
||||
\
|
||||
/* Save address of next panel of A to the auxinfo_t object. */ \
|
||||
bli_auxinfo_set_next_a( a2, aux ); \
|
||||
\
|
||||
/* Handle interior and edge cases separately. */ \
|
||||
if ( m_cur == MR && n_cur == NR ) \
|
||||
@@ -268,7 +279,7 @@ void PASTEMAC(ch,varname)( \
|
||||
bp, \
|
||||
beta_cast, \
|
||||
c11, rs_c, cs_c, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
@@ -279,7 +290,7 @@ void PASTEMAC(ch,varname)( \
|
||||
bp, \
|
||||
zero, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
\
|
||||
/* Scale the bottom edge of C and add the result from above. */ \
|
||||
PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \
|
||||
|
||||
@@ -45,8 +45,7 @@ void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict b, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
ctype* restrict a_next, \
|
||||
ctype* restrict b_next \
|
||||
auxinfo_t* data \
|
||||
) \
|
||||
{ \
|
||||
const dim_t m = PASTEMAC(ch,mr); \
|
||||
|
||||
@@ -46,8 +46,7 @@ void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict b, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
ctype* restrict a_next, \
|
||||
ctype* restrict b_next \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( gemm_ref_mxn )
|
||||
|
||||
@@ -175,6 +175,7 @@ void PASTEMAC(ch,varname)( \
|
||||
inc_t rstep_a; \
|
||||
inc_t cstep_b; \
|
||||
inc_t rstep_c, cstep_c; \
|
||||
auxinfo_t aux; \
|
||||
\
|
||||
/*
|
||||
Assumptions/assertions:
|
||||
@@ -238,6 +239,10 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
rstep_c = rs_c * MR; \
|
||||
cstep_c = cs_c * NR; \
|
||||
\
|
||||
/* Save the panel strides of A and B to the auxinfo_t object. */ \
|
||||
bli_auxinfo_set_ps_a( ps_a, aux ); \
|
||||
bli_auxinfo_set_ps_b( ps_b, aux ); \
|
||||
\
|
||||
b1 = b_cast; \
|
||||
c1 = c_cast; \
|
||||
@@ -276,6 +281,11 @@ void PASTEMAC(ch,varname)( \
|
||||
if ( j == n_iter - 1 ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
\
|
||||
/* Save addresses of next panels of A and B to the auxinfo_t
|
||||
object. */ \
|
||||
bli_auxinfo_set_next_a( a2, aux ); \
|
||||
bli_auxinfo_set_next_b( b2, aux ); \
|
||||
\
|
||||
/* If the diagonal intersects the current MR x NR submatrix, we
|
||||
compute it the temporary buffer and then add in the elements
|
||||
@@ -293,7 +303,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1, \
|
||||
zero, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
\
|
||||
/* Scale C and add the result to only the stored part. */ \
|
||||
PASTEMAC(ch,xpbys_mxn_l)( diagoffc_ij, \
|
||||
@@ -314,7 +324,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1, \
|
||||
beta_cast, \
|
||||
c11, rs_c, cs_c, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
@@ -325,7 +335,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1, \
|
||||
zero, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
\
|
||||
/* Scale the edge of C and add the result. */ \
|
||||
PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \
|
||||
|
||||
@@ -175,6 +175,7 @@ void PASTEMAC(ch,varname)( \
|
||||
inc_t rstep_a; \
|
||||
inc_t cstep_b; \
|
||||
inc_t rstep_c, cstep_c; \
|
||||
auxinfo_t aux; \
|
||||
\
|
||||
/*
|
||||
Assumptions/assertions:
|
||||
@@ -238,6 +239,10 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
rstep_c = rs_c * MR; \
|
||||
cstep_c = cs_c * NR; \
|
||||
\
|
||||
/* Save the panel strides of A and B to the auxinfo_t object. */ \
|
||||
bli_auxinfo_set_ps_a( ps_a, aux ); \
|
||||
bli_auxinfo_set_ps_b( ps_b, aux ); \
|
||||
\
|
||||
b1 = b_cast; \
|
||||
c1 = c_cast; \
|
||||
@@ -276,6 +281,11 @@ void PASTEMAC(ch,varname)( \
|
||||
if ( j == n_iter - 1 ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
\
|
||||
/* Save addresses of next panels of A and B to the auxinfo_t
|
||||
object. */ \
|
||||
bli_auxinfo_set_next_a( a2, aux ); \
|
||||
bli_auxinfo_set_next_b( b2, aux ); \
|
||||
\
|
||||
/* If the diagonal intersects the current MR x NR submatrix, we
|
||||
compute it the temporary buffer and then add in the elements
|
||||
@@ -293,7 +303,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1, \
|
||||
zero, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
\
|
||||
/* Scale C and add the result to only the stored part. */ \
|
||||
PASTEMAC(ch,xpbys_mxn_u)( diagoffc_ij, \
|
||||
@@ -314,7 +324,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1, \
|
||||
beta_cast, \
|
||||
c11, rs_c, cs_c, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
@@ -325,7 +335,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1, \
|
||||
zero, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
\
|
||||
/* Scale the edge of C and add the result. */ \
|
||||
PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \
|
||||
|
||||
@@ -162,6 +162,7 @@ void PASTEMAC(ch,varname)( \
|
||||
inc_t rstep_a; \
|
||||
inc_t cstep_b; \
|
||||
inc_t rstep_c, cstep_c; \
|
||||
auxinfo_t aux; \
|
||||
\
|
||||
/*
|
||||
Assumptions/assertions:
|
||||
@@ -224,6 +225,10 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
rstep_c = rs_c * MR; \
|
||||
cstep_c = cs_c * NR; \
|
||||
\
|
||||
/* Save the panel strides of A and B to the auxinfo_t object. */ \
|
||||
bli_auxinfo_set_ps_a( ps_a, aux ); \
|
||||
bli_auxinfo_set_ps_b( ps_b, aux ); \
|
||||
\
|
||||
b1 = b_cast; \
|
||||
c1 = c_cast; \
|
||||
@@ -276,6 +281,11 @@ void PASTEMAC(ch,varname)( \
|
||||
if ( j == n_iter - 1 ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
\
|
||||
/* Save addresses of next panels of A and B to the auxinfo_t
|
||||
object. */ \
|
||||
bli_auxinfo_set_next_a( a2, aux ); \
|
||||
bli_auxinfo_set_next_b( b2, aux ); \
|
||||
\
|
||||
/* Handle interior and edge cases separately. */ \
|
||||
if ( m_cur == MR && n_cur == NR ) \
|
||||
@@ -287,7 +297,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1_i, \
|
||||
beta_cast, \
|
||||
c11, rs_c, cs_c, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
@@ -303,7 +313,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1_i, \
|
||||
beta_cast, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
\
|
||||
/* Copy the result to the edge of C. */ \
|
||||
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
|
||||
@@ -326,6 +336,11 @@ void PASTEMAC(ch,varname)( \
|
||||
if ( j == n_iter - 1 ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
\
|
||||
/* Save addresses of next panels of A and B to the auxinfo_t
|
||||
object. */ \
|
||||
bli_auxinfo_set_next_a( a2, aux ); \
|
||||
bli_auxinfo_set_next_b( b2, aux ); \
|
||||
\
|
||||
/* Handle interior and edge cases separately. */ \
|
||||
if ( m_cur == MR && n_cur == NR ) \
|
||||
@@ -337,7 +352,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1, \
|
||||
one, \
|
||||
c11, rs_c, cs_c, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
@@ -348,7 +363,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1, \
|
||||
zero, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
\
|
||||
/* Add the result to the edge of C. */ \
|
||||
PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \
|
||||
|
||||
@@ -162,6 +162,7 @@ void PASTEMAC(ch,varname)( \
|
||||
inc_t rstep_a; \
|
||||
inc_t cstep_b; \
|
||||
inc_t rstep_c, cstep_c; \
|
||||
auxinfo_t aux; \
|
||||
\
|
||||
/*
|
||||
Assumptions/assertions:
|
||||
@@ -232,6 +233,10 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
rstep_c = rs_c * MR; \
|
||||
cstep_c = cs_c * NR; \
|
||||
\
|
||||
/* Save the panel strides of A and B to the auxinfo_t object. */ \
|
||||
bli_auxinfo_set_ps_a( ps_a, aux ); \
|
||||
bli_auxinfo_set_ps_b( ps_b, aux ); \
|
||||
\
|
||||
b1 = b_cast; \
|
||||
c1 = c_cast; \
|
||||
@@ -284,6 +289,11 @@ void PASTEMAC(ch,varname)( \
|
||||
if ( j == n_iter - 1 ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
\
|
||||
/* Save addresses of next panels of A and B to the auxinfo_t
|
||||
object. */ \
|
||||
bli_auxinfo_set_next_a( a2, aux ); \
|
||||
bli_auxinfo_set_next_b( b2, aux ); \
|
||||
\
|
||||
/* Handle interior and edge cases separately. */ \
|
||||
if ( m_cur == MR && n_cur == NR ) \
|
||||
@@ -295,7 +305,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1_i, \
|
||||
beta_cast, \
|
||||
c11, rs_c, cs_c, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
@@ -311,7 +321,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1_i, \
|
||||
beta_cast, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
\
|
||||
/* Copy the result to the edge of C. */ \
|
||||
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
|
||||
@@ -334,6 +344,11 @@ void PASTEMAC(ch,varname)( \
|
||||
if ( j == n_iter - 1 ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
\
|
||||
/* Save addresses of next panels of A and B to the auxinfo_t
|
||||
object. */ \
|
||||
bli_auxinfo_set_next_a( a2, aux ); \
|
||||
bli_auxinfo_set_next_b( b2, aux ); \
|
||||
\
|
||||
/* Handle interior and edge cases separately. */ \
|
||||
if ( m_cur == MR && n_cur == NR ) \
|
||||
@@ -345,7 +360,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1, \
|
||||
one, \
|
||||
c11, rs_c, cs_c, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
@@ -356,7 +371,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1, \
|
||||
zero, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
\
|
||||
/* Add the result to the edge of C. */ \
|
||||
PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \
|
||||
|
||||
@@ -163,6 +163,7 @@ void PASTEMAC(ch,varname)( \
|
||||
inc_t rstep_a; \
|
||||
inc_t cstep_b; \
|
||||
inc_t rstep_c, cstep_c; \
|
||||
auxinfo_t aux; \
|
||||
\
|
||||
/*
|
||||
Assumptions/assertions:
|
||||
@@ -233,6 +234,10 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
rstep_c = rs_c * MR; \
|
||||
cstep_c = cs_c * NR; \
|
||||
\
|
||||
/* Save the panel strides of A and B to the auxinfo_t object. */ \
|
||||
bli_auxinfo_set_ps_a( ps_a, aux ); \
|
||||
bli_auxinfo_set_ps_b( ps_b, aux ); \
|
||||
\
|
||||
b1 = b_cast; \
|
||||
c1 = c_cast; \
|
||||
@@ -284,6 +289,11 @@ void PASTEMAC(ch,varname)( \
|
||||
if ( j == n_iter - 1 ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
\
|
||||
/* Save addresses of next panels of A and B to the auxinfo_t
|
||||
object. */ \
|
||||
bli_auxinfo_set_next_a( a2, aux ); \
|
||||
bli_auxinfo_set_next_b( b2, aux ); \
|
||||
\
|
||||
/* Handle interior and edge cases separately. */ \
|
||||
if ( m_cur == MR && n_cur == NR ) \
|
||||
@@ -295,7 +305,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1, \
|
||||
beta_cast, \
|
||||
c11, rs_c, cs_c, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
@@ -311,7 +321,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1, \
|
||||
beta_cast, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
\
|
||||
/* Copy the result to the edge of C. */ \
|
||||
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
|
||||
@@ -341,6 +351,11 @@ void PASTEMAC(ch,varname)( \
|
||||
if ( j == n_iter - 1 ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
\
|
||||
/* Save addresses of next panels of A and B to the auxinfo_t
|
||||
object. */ \
|
||||
bli_auxinfo_set_next_a( a2, aux ); \
|
||||
bli_auxinfo_set_next_b( b2, aux ); \
|
||||
\
|
||||
/* Handle interior and edge cases separately. */ \
|
||||
if ( m_cur == MR && n_cur == NR ) \
|
||||
@@ -352,7 +367,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1, \
|
||||
one, \
|
||||
c11, rs_c, cs_c, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
@@ -363,7 +378,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1, \
|
||||
zero, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
\
|
||||
/* Add the result to the edge of C. */ \
|
||||
PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \
|
||||
|
||||
@@ -163,6 +163,7 @@ void PASTEMAC(ch,varname)( \
|
||||
inc_t rstep_a; \
|
||||
inc_t cstep_b; \
|
||||
inc_t rstep_c, cstep_c; \
|
||||
auxinfo_t aux; \
|
||||
\
|
||||
/*
|
||||
Assumptions/assertions:
|
||||
@@ -234,6 +235,10 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
rstep_c = rs_c * MR; \
|
||||
cstep_c = cs_c * NR; \
|
||||
\
|
||||
/* Save the panel strides of A and B to the auxinfo_t object. */ \
|
||||
bli_auxinfo_set_ps_a( ps_a, aux ); \
|
||||
bli_auxinfo_set_ps_b( ps_b, aux ); \
|
||||
\
|
||||
b1 = b_cast; \
|
||||
c1 = c_cast; \
|
||||
@@ -284,6 +289,11 @@ void PASTEMAC(ch,varname)( \
|
||||
if ( j == n_iter - 1 ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
\
|
||||
/* Save addresses of next panels of A and B to the auxinfo_t
|
||||
object. */ \
|
||||
bli_auxinfo_set_next_a( a2, aux ); \
|
||||
bli_auxinfo_set_next_b( b2, aux ); \
|
||||
\
|
||||
/* Handle interior and edge cases separately. */ \
|
||||
if ( m_cur == MR && n_cur == NR ) \
|
||||
@@ -295,7 +305,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1, \
|
||||
beta_cast, \
|
||||
c11, rs_c, cs_c, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
@@ -311,7 +321,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1, \
|
||||
beta_cast, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
\
|
||||
/* Copy the result to the edge of C. */ \
|
||||
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
|
||||
@@ -341,6 +351,11 @@ void PASTEMAC(ch,varname)( \
|
||||
if ( j == n_iter - 1 ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
\
|
||||
/* Save addresses of next panels of A and B to the auxinfo_t
|
||||
object. */ \
|
||||
bli_auxinfo_set_next_a( a2, aux ); \
|
||||
bli_auxinfo_set_next_b( b2, aux ); \
|
||||
\
|
||||
/* Handle interior and edge cases separately. */ \
|
||||
if ( m_cur == MR && n_cur == NR ) \
|
||||
@@ -352,7 +367,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1, \
|
||||
one, \
|
||||
c11, rs_c, cs_c, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
@@ -363,7 +378,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1, \
|
||||
zero, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
\
|
||||
/* Add the result to the edge of C. */ \
|
||||
PASTEMAC(ch,adds_mxn)( m_cur, n_cur, \
|
||||
|
||||
@@ -151,6 +151,7 @@ void PASTEMAC(ch,varname)( \
|
||||
dim_t rstep_a; \
|
||||
dim_t cstep_b; \
|
||||
dim_t rstep_c, cstep_c; \
|
||||
auxinfo_t aux; \
|
||||
\
|
||||
/*
|
||||
Assumptions/assertions:
|
||||
@@ -223,6 +224,10 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
rstep_c = rs_c * MR; \
|
||||
cstep_c = cs_c * NR; \
|
||||
\
|
||||
/* Save the panel strides of A and B to the auxinfo_t object. */ \
|
||||
bli_auxinfo_set_ps_a( ps_a, aux ); \
|
||||
bli_auxinfo_set_ps_b( ps_b, aux ); \
|
||||
\
|
||||
b1 = b_cast; \
|
||||
c1 = c_cast; \
|
||||
@@ -288,6 +293,11 @@ void PASTEMAC(ch,varname)( \
|
||||
if ( j == n_iter - 1 ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
\
|
||||
/* Save addresses of next panels of A and B to the auxinfo_t
|
||||
object. */ \
|
||||
bli_auxinfo_set_next_a( a2, aux ); \
|
||||
bli_auxinfo_set_next_b( b2, aux ); \
|
||||
\
|
||||
/* Handle interior and edge cases separately. */ \
|
||||
if ( m_cur == MR && n_cur == NR ) \
|
||||
@@ -300,7 +310,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b01, \
|
||||
b11, \
|
||||
c11, rs_c, cs_c, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
@@ -312,7 +322,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b01, \
|
||||
b11, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
\
|
||||
/* Copy the result to the bottom edge of C. */ \
|
||||
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
|
||||
@@ -335,6 +345,11 @@ void PASTEMAC(ch,varname)( \
|
||||
if ( j == n_iter - 1 ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
\
|
||||
/* Save addresses of next panels of A and B to the auxinfo_t
|
||||
object. */ \
|
||||
bli_auxinfo_set_next_a( a2, aux ); \
|
||||
bli_auxinfo_set_next_b( b2, aux ); \
|
||||
\
|
||||
/* Handle interior and edge cases separately. */ \
|
||||
if ( m_cur == MR && n_cur == NR ) \
|
||||
@@ -346,7 +361,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1, \
|
||||
alpha_cast, \
|
||||
c11, rs_c, cs_c, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
@@ -357,7 +372,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1, \
|
||||
zero, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
\
|
||||
/* Add the result to the edge of C. */ \
|
||||
PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \
|
||||
|
||||
@@ -152,6 +152,7 @@ void PASTEMAC(ch,varname)( \
|
||||
dim_t rstep_a; \
|
||||
dim_t cstep_b; \
|
||||
dim_t rstep_c, cstep_c; \
|
||||
auxinfo_t aux; \
|
||||
\
|
||||
/*
|
||||
Assumptions/assertions:
|
||||
@@ -232,6 +233,10 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
rstep_c = rs_c * MR; \
|
||||
cstep_c = cs_c * NR; \
|
||||
\
|
||||
/* Save the panel strides of A and B to the auxinfo_t object. */ \
|
||||
bli_auxinfo_set_ps_a( ps_a, aux ); \
|
||||
bli_auxinfo_set_ps_b( ps_b, aux ); \
|
||||
\
|
||||
b1 = b_cast; \
|
||||
c1 = c_cast; \
|
||||
@@ -299,6 +304,11 @@ void PASTEMAC(ch,varname)( \
|
||||
if ( j == n_iter - 1 ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
\
|
||||
/* Save addresses of next panels of A and B to the auxinfo_t
|
||||
object. */ \
|
||||
bli_auxinfo_set_next_a( a2, aux ); \
|
||||
bli_auxinfo_set_next_b( b2, aux ); \
|
||||
\
|
||||
/* Handle interior and edge cases separately. */ \
|
||||
if ( m_cur == MR && n_cur == NR ) \
|
||||
@@ -311,7 +321,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b21, \
|
||||
b11, \
|
||||
c11, rs_c, cs_c, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
@@ -323,7 +333,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b21, \
|
||||
b11, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
\
|
||||
/* Copy the result to the bottom edge of C. */ \
|
||||
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
|
||||
@@ -346,6 +356,11 @@ void PASTEMAC(ch,varname)( \
|
||||
if ( j == n_iter - 1 ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
\
|
||||
/* Save addresses of next panels of A and B to the auxinfo_t
|
||||
object. */ \
|
||||
bli_auxinfo_set_next_a( a2, aux ); \
|
||||
bli_auxinfo_set_next_b( b2, aux ); \
|
||||
\
|
||||
/* Handle interior and edge cases separately. */ \
|
||||
if ( m_cur == MR && n_cur == NR ) \
|
||||
@@ -357,7 +372,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1, \
|
||||
alpha_cast, \
|
||||
c11, rs_c, cs_c, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
@@ -368,7 +383,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b1, \
|
||||
zero, \
|
||||
ct, rs_ct, cs_ct, \
|
||||
a2, b2 ); \
|
||||
&aux ); \
|
||||
\
|
||||
/* Add the result to the edge of C. */ \
|
||||
PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \
|
||||
|
||||
@@ -152,6 +152,7 @@ void PASTEMAC(ch,varname)( \
|
||||
dim_t rstep_a; \
|
||||
dim_t cstep_b; \
|
||||
dim_t rstep_c, cstep_c; \
|
||||
auxinfo_t aux; \
|
||||
\
|
||||
/*
|
||||
Assumptions/assertions:
|
||||
@@ -237,6 +238,12 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
rstep_c = rs_c * MR; \
|
||||
cstep_c = cs_c * NR; \
|
||||
\
|
||||
/* Save the panel strides of A and B to the auxinfo_t object.
|
||||
NOTE: We swap the values for A and B since the triangular
|
||||
"A" matrix is actually contained within B. */ \
|
||||
bli_auxinfo_set_ps_a( ps_b, aux ); \
|
||||
bli_auxinfo_set_ps_b( ps_a, aux ); \
|
||||
\
|
||||
b1 = b_cast; \
|
||||
c1 = c_cast; \
|
||||
@@ -302,6 +309,12 @@ void PASTEMAC(ch,varname)( \
|
||||
if ( jb == n_iter - 1 ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
\
|
||||
/* Save addresses of next panels of A and B to the auxinfo_t
|
||||
object. NOTE: We swap the values for A and B since the
|
||||
triangular "A" matrix is actually contained within B. */ \
|
||||
bli_auxinfo_set_next_a( b2, aux ); \
|
||||
bli_auxinfo_set_next_b( a2, aux ); \
|
||||
\
|
||||
/* Handle interior and edge cases separately. */ \
|
||||
if ( m_cur == MR && n_cur == NR ) \
|
||||
@@ -314,7 +327,7 @@ void PASTEMAC(ch,varname)( \
|
||||
a12, \
|
||||
a11, \
|
||||
c11, cs_c, rs_c, \
|
||||
b2, a2 ); \
|
||||
&aux ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
@@ -326,7 +339,7 @@ void PASTEMAC(ch,varname)( \
|
||||
a12, \
|
||||
a11, \
|
||||
ct, cs_ct, rs_ct, \
|
||||
b2, a2 ); \
|
||||
&aux ); \
|
||||
\
|
||||
/* Copy the result to the bottom edge of C. */ \
|
||||
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
|
||||
@@ -356,6 +369,12 @@ void PASTEMAC(ch,varname)( \
|
||||
if ( jb == n_iter - 1 ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
\
|
||||
/* Save addresses of next panels of A and B to the auxinfo_t
|
||||
object. NOTE: We swap the values for A and B since the
|
||||
triangular "A" matrix is actually contained within B. */ \
|
||||
bli_auxinfo_set_next_a( b2, aux ); \
|
||||
bli_auxinfo_set_next_b( a2, aux ); \
|
||||
\
|
||||
/* Handle interior and edge cases separately. */ \
|
||||
if ( m_cur == MR && n_cur == NR ) \
|
||||
@@ -367,7 +386,7 @@ void PASTEMAC(ch,varname)( \
|
||||
a1, \
|
||||
alpha_cast, \
|
||||
c11, cs_c, rs_c, \
|
||||
b2, a2 ); \
|
||||
&aux ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
@@ -378,7 +397,7 @@ void PASTEMAC(ch,varname)( \
|
||||
a1, \
|
||||
zero, \
|
||||
ct, cs_ct, rs_ct, \
|
||||
b2, a2 ); \
|
||||
&aux ); \
|
||||
\
|
||||
/* Add the result to the edge of C. */ \
|
||||
PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \
|
||||
|
||||
@@ -151,6 +151,7 @@ void PASTEMAC(ch,varname)( \
|
||||
dim_t rstep_a; \
|
||||
dim_t cstep_b; \
|
||||
dim_t rstep_c, cstep_c; \
|
||||
auxinfo_t aux; \
|
||||
\
|
||||
/*
|
||||
Assumptions/assertions:
|
||||
@@ -232,6 +233,12 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
rstep_c = rs_c * MR; \
|
||||
cstep_c = cs_c * NR; \
|
||||
\
|
||||
/* Save the panel strides of A and B to the auxinfo_t object.
|
||||
NOTE: We swap the values for A and B since the triangular
|
||||
"A" matrix is actually contained within B. */ \
|
||||
bli_auxinfo_set_ps_a( ps_b, aux ); \
|
||||
bli_auxinfo_set_ps_b( ps_a, aux ); \
|
||||
\
|
||||
b1 = b_cast; \
|
||||
c1 = c_cast; \
|
||||
@@ -296,6 +303,12 @@ void PASTEMAC(ch,varname)( \
|
||||
if ( j == n_iter - 1 ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
\
|
||||
/* Save addresses of next panels of A and B to the auxinfo_t
|
||||
object. NOTE: We swap the values for A and B since the
|
||||
triangular "A" matrix is actually contained within B. */ \
|
||||
bli_auxinfo_set_next_a( b2, aux ); \
|
||||
bli_auxinfo_set_next_b( a2, aux ); \
|
||||
\
|
||||
/* Handle interior and edge cases separately. */ \
|
||||
if ( m_cur == MR && n_cur == NR ) \
|
||||
@@ -308,7 +321,7 @@ void PASTEMAC(ch,varname)( \
|
||||
a10, \
|
||||
a11, \
|
||||
c11, cs_c, rs_c, \
|
||||
b2, a2 ); \
|
||||
&aux ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
@@ -320,7 +333,7 @@ void PASTEMAC(ch,varname)( \
|
||||
a10, \
|
||||
a11, \
|
||||
ct, cs_ct, rs_ct, \
|
||||
b2, a2 ); \
|
||||
&aux ); \
|
||||
\
|
||||
/* Copy the result to the bottom edge of C. */ \
|
||||
PASTEMAC(ch,copys_mxn)( m_cur, n_cur, \
|
||||
@@ -350,6 +363,12 @@ void PASTEMAC(ch,varname)( \
|
||||
if ( j == n_iter - 1 ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
\
|
||||
/* Save addresses of next panels of A and B to the auxinfo_t
|
||||
object. NOTE: We swap the values for A and B since the
|
||||
triangular "A" matrix is actually contained within B. */ \
|
||||
bli_auxinfo_set_next_a( b2, aux ); \
|
||||
bli_auxinfo_set_next_b( a2, aux ); \
|
||||
\
|
||||
/* Handle interior and edge cases separately. */ \
|
||||
if ( m_cur == MR && n_cur == NR ) \
|
||||
@@ -361,7 +380,7 @@ void PASTEMAC(ch,varname)( \
|
||||
a1, \
|
||||
alpha_cast, \
|
||||
c11, cs_c, rs_c, \
|
||||
b2, a2 ); \
|
||||
&aux ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
@@ -372,7 +391,7 @@ void PASTEMAC(ch,varname)( \
|
||||
a1, \
|
||||
zero, \
|
||||
ct, cs_ct, rs_ct, \
|
||||
b2, a2 ); \
|
||||
&aux ); \
|
||||
\
|
||||
/* Add the result to the edge of C. */ \
|
||||
PASTEMAC(ch,xpbys_mxn)( m_cur, n_cur, \
|
||||
|
||||
@@ -46,8 +46,7 @@ void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict b01, \
|
||||
ctype* restrict b11, \
|
||||
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
|
||||
ctype* restrict a_next, \
|
||||
ctype* restrict b_next \
|
||||
auxinfo_t* data \
|
||||
) \
|
||||
{ \
|
||||
const inc_t rs_b = PASTEMAC(ch,packnr); \
|
||||
@@ -62,14 +61,14 @@ void PASTEMAC(ch,varname)( \
|
||||
b01, \
|
||||
alpha, \
|
||||
b11, rs_b, cs_b, \
|
||||
a_next, \
|
||||
b_next ); \
|
||||
data ); \
|
||||
\
|
||||
/* b11 = inv(a11) * b11;
|
||||
c11 = b11; */ \
|
||||
PASTEMAC(ch,trsmukr)( a11, \
|
||||
b11, \
|
||||
c11, rs_c, cs_c ); \
|
||||
c11, rs_c, cs_c, \
|
||||
data ); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC2( gemmtrsm_l_ref_mxn, GEMM_UKERNEL, TRSM_L_UKERNEL )
|
||||
|
||||
@@ -47,8 +47,7 @@ void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict b01, \
|
||||
ctype* restrict b11, \
|
||||
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
|
||||
ctype* restrict a_next, \
|
||||
ctype* restrict b_next \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( gemmtrsm_l_ref_mxn )
|
||||
|
||||
@@ -46,8 +46,7 @@ void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict b21, \
|
||||
ctype* restrict b11, \
|
||||
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
|
||||
ctype* restrict a_next, \
|
||||
ctype* restrict b_next \
|
||||
auxinfo_t* data \
|
||||
) \
|
||||
{ \
|
||||
const inc_t rs_b = PASTEMAC(ch,packnr); \
|
||||
@@ -62,14 +61,14 @@ void PASTEMAC(ch,varname)( \
|
||||
b21, \
|
||||
alpha, \
|
||||
b11, rs_b, cs_b, \
|
||||
a_next, \
|
||||
b_next ); \
|
||||
data ); \
|
||||
\
|
||||
/* b11 = inv(a11) * b11;
|
||||
c11 = b11; */ \
|
||||
PASTEMAC(ch,trsmukr)( a11, \
|
||||
b11, \
|
||||
c11, rs_c, cs_c ); \
|
||||
c11, rs_c, cs_c, \
|
||||
data ); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC2( gemmtrsm_u_ref_mxn, GEMM_UKERNEL, TRSM_U_UKERNEL )
|
||||
|
||||
@@ -47,8 +47,7 @@ void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict b21, \
|
||||
ctype* restrict b11, \
|
||||
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
|
||||
ctype* restrict a_next, \
|
||||
ctype* restrict b_next \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( gemmtrsm_u_ref_mxn )
|
||||
|
||||
@@ -41,7 +41,8 @@
|
||||
void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict a, \
|
||||
ctype* restrict b, \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
) \
|
||||
{ \
|
||||
const dim_t m = PASTEMAC(ch,mr); \
|
||||
|
||||
@@ -42,7 +42,8 @@
|
||||
void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict a, \
|
||||
ctype* restrict b, \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( trsm_l_ref_mxn )
|
||||
|
||||
@@ -41,7 +41,8 @@
|
||||
void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict a, \
|
||||
ctype* restrict b, \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
) \
|
||||
{ \
|
||||
const dim_t m = PASTEMAC(ch,mr); \
|
||||
|
||||
@@ -42,7 +42,8 @@
|
||||
void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict a, \
|
||||
ctype* restrict b, \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( trsm_u_ref_mxn )
|
||||
|
||||
58
frame/include/bli_auxinfo_macro_defs.h
Normal file
58
frame/include/bli_auxinfo_macro_defs.h
Normal file
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2013, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_AUXINFO_MACRO_DEFS_H
|
||||
#define BLIS_AUXINFO_MACRO_DEFS_H
|
||||
|
||||
|
||||
// auxinfo_t field query
|
||||
|
||||
#define bli_auxinfo_next_a( auxinfo ) ( (auxinfo)->a_next )
|
||||
#define bli_auxinfo_next_b( auxinfo ) ( (auxinfo)->b_next )
|
||||
|
||||
#define bli_auxinfo_ps_a( auxinfo ) ( (auxinfo)->ps_a )
|
||||
#define bli_auxinfo_ps_b( auxinfo ) ( (auxinfo)->ps_b )
|
||||
|
||||
|
||||
// auxinfo_t field modification
|
||||
|
||||
#define bli_auxinfo_set_next_a( a_p, auxinfo ) { (auxinfo).a_next = a_p; }
|
||||
#define bli_auxinfo_set_next_b( b_p, auxinfo ) { (auxinfo).b_next = b_p; }
|
||||
|
||||
#define bli_auxinfo_set_ps_a( a_p, auxinfo ) { (auxinfo).ps_a = a_p; }
|
||||
#define bli_auxinfo_set_ps_b( b_p, auxinfo ) { (auxinfo).ps_b = b_p; }
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -91,6 +91,7 @@
|
||||
#include "bli_scalar_macro_defs.h"
|
||||
#include "bli_error_macro_defs.h"
|
||||
#include "bli_blas_macro_defs.h"
|
||||
#include "bli_auxinfo_macro_defs.h"
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -63,32 +63,32 @@
|
||||
|
||||
#define bli_pool_set_block_ptrs( block_ptrs0, pool_p ) \
|
||||
{ \
|
||||
pool_p->block_ptrs = block_ptrs0; \
|
||||
(pool_p)->block_ptrs = block_ptrs0; \
|
||||
}
|
||||
|
||||
#define bli_pool_set_num_blocks( num_blocks0, pool_p ) \
|
||||
{ \
|
||||
pool_p->num_blocks = num_blocks0; \
|
||||
(pool_p)->num_blocks = num_blocks0; \
|
||||
}
|
||||
|
||||
#define bli_pool_set_block_size( block_size0, pool_p ) \
|
||||
{ \
|
||||
pool_p->block_size = block_size0; \
|
||||
(pool_p)->block_size = block_size0; \
|
||||
}
|
||||
|
||||
#define bli_pool_set_top_index( top_index0, pool_p ) \
|
||||
{ \
|
||||
pool_p->top_index = top_index0; \
|
||||
(pool_p)->top_index = top_index0; \
|
||||
}
|
||||
|
||||
#define bli_pool_dec_top_index( pool_p ) \
|
||||
{ \
|
||||
(pool_p->top_index)--; \
|
||||
((pool_p)->top_index)--; \
|
||||
}
|
||||
|
||||
#define bli_pool_inc_top_index( pool_p ) \
|
||||
{ \
|
||||
(pool_p->top_index)++; \
|
||||
((pool_p)->top_index)++; \
|
||||
}
|
||||
|
||||
#define bli_pool_init( num_blocks, block_size, block_ptrs, pool_p ) \
|
||||
|
||||
@@ -167,6 +167,26 @@ typedef double f77_double;
|
||||
typedef scomplex f77_scomplex;
|
||||
typedef dcomplex f77_dcomplex;
|
||||
|
||||
// -- Auxiliary kernel info type --
|
||||
|
||||
// Note: This struct is used by macro-kernels to package together extra
|
||||
// parameter values that may be of use to the micro-kernel without
|
||||
// cluttering up the micro-kernel interface itself.
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// Pointers to the micro-panels of A and B which will be used by the
|
||||
// next call to the micro-kernel.
|
||||
void* a_next;
|
||||
void* b_next;
|
||||
|
||||
// The panel strides of A and B.
|
||||
inc_t ps_a;
|
||||
inc_t ps_b;
|
||||
|
||||
} auxinfo_t;
|
||||
|
||||
|
||||
|
||||
//
|
||||
// -- BLIS info bit field masks ------------------------------------------------
|
||||
|
||||
@@ -35,16 +35,18 @@
|
||||
#include "blis.h"
|
||||
|
||||
void bli_sgemm_opt_4x4(
|
||||
dim_t k,
|
||||
float* restrict alpha,
|
||||
float* restrict a,
|
||||
float* restrict b,
|
||||
float* restrict beta,
|
||||
float* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
float* restrict a_next,
|
||||
float* restrict b_next
|
||||
dim_t k,
|
||||
float* restrict alpha,
|
||||
float* restrict a,
|
||||
float* restrict b,
|
||||
float* restrict beta,
|
||||
float* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
void* a_next = bli_auxinfo_next_a( data );
|
||||
void* b_next = bli_auxinfo_next_b( data );
|
||||
|
||||
float32x4_t alphav;
|
||||
alphav = vmovq_n_f32( *alpha );
|
||||
|
||||
@@ -262,16 +264,18 @@ void bli_sgemm_opt_4x4(
|
||||
}
|
||||
|
||||
void bli_dgemm_opt_4x4(
|
||||
dim_t k,
|
||||
double* restrict alpha,
|
||||
double* restrict a,
|
||||
double* restrict b,
|
||||
double* restrict beta,
|
||||
double* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
double* restrict a_next,
|
||||
double* restrict b_next
|
||||
dim_t k,
|
||||
double* restrict alpha,
|
||||
double* restrict a,
|
||||
double* restrict b,
|
||||
double* restrict beta,
|
||||
double* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
//void* a_next = bli_auxinfo_next_a( data );
|
||||
//void* b_next = bli_auxinfo_next_b( data );
|
||||
|
||||
//dim_t k_iter;
|
||||
dim_t k_left;
|
||||
|
||||
@@ -515,8 +519,7 @@ void bli_cgemm_opt_4x4(
|
||||
scomplex* restrict b,
|
||||
scomplex* restrict beta,
|
||||
scomplex* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
scomplex* restrict a_next,
|
||||
scomplex* restrict b_next
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
@@ -526,8 +529,7 @@ void bli_cgemm_opt_4x4(
|
||||
b,
|
||||
beta,
|
||||
c, rs_c, cs_c,
|
||||
a_next,
|
||||
b_next );
|
||||
data );
|
||||
}
|
||||
|
||||
void bli_zgemm_opt_4x4(
|
||||
@@ -537,8 +539,7 @@ void bli_zgemm_opt_4x4(
|
||||
dcomplex* restrict b,
|
||||
dcomplex* restrict beta,
|
||||
dcomplex* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
dcomplex* restrict a_next,
|
||||
dcomplex* restrict b_next
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
@@ -548,7 +549,6 @@ void bli_zgemm_opt_4x4(
|
||||
b,
|
||||
beta,
|
||||
c, rs_c, cs_c,
|
||||
a_next,
|
||||
b_next );
|
||||
data );
|
||||
}
|
||||
|
||||
|
||||
@@ -45,8 +45,7 @@ void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict b, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
ctype* restrict a_next, \
|
||||
ctype* restrict b_next \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( gemm_opt_4x4 )
|
||||
|
||||
@@ -36,14 +36,14 @@
|
||||
#undef restrict
|
||||
|
||||
void bli_sgemm_8x8(
|
||||
dim_t k,
|
||||
float* alpha,
|
||||
float* a,
|
||||
float* b,
|
||||
float* beta,
|
||||
float* c, inc_t rs_c, inc_t cs_c,
|
||||
float* a_next, float* b_next
|
||||
)
|
||||
dim_t k,
|
||||
float* alpha,
|
||||
float* a,
|
||||
float* b,
|
||||
float* beta,
|
||||
float* c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
bli_sgemm_ref_mxn( k,
|
||||
@@ -52,8 +52,7 @@ void bli_sgemm_8x8(
|
||||
b,
|
||||
beta,
|
||||
c, rs_c, cs_c,
|
||||
a_next,
|
||||
b_next );
|
||||
data );
|
||||
}
|
||||
|
||||
|
||||
@@ -76,14 +75,14 @@ void bli_sgemm_8x8(
|
||||
*/
|
||||
|
||||
void bli_dgemm_8x8(
|
||||
dim_t k,
|
||||
restrict double* alpha,
|
||||
restrict double* a,
|
||||
restrict double* b,
|
||||
restrict double* beta,
|
||||
restrict double* c, inc_t rs_c, inc_t cs_c,
|
||||
restrict double* a_next, restrict double* b_next
|
||||
)
|
||||
dim_t k,
|
||||
double* alpha,
|
||||
double* a,
|
||||
double* b,
|
||||
double* beta,
|
||||
double* c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
)
|
||||
|
||||
{
|
||||
//Registers for storing C.
|
||||
@@ -224,33 +223,34 @@ void bli_dgemm_8x8(
|
||||
}
|
||||
|
||||
void bli_dgemm_8x8_mt(
|
||||
dim_t k,
|
||||
restrict double* alpha,
|
||||
restrict double* a,
|
||||
restrict double* b,
|
||||
restrict double* beta,
|
||||
restrict double* c, inc_t rs_c, inc_t cs_c,
|
||||
restrict double* a_next, restrict double* b_next,
|
||||
dim_t tid
|
||||
)
|
||||
dim_t k,
|
||||
double* alpha,
|
||||
double* a,
|
||||
double* b,
|
||||
double* beta,
|
||||
double* c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data,
|
||||
dim_t tid
|
||||
)
|
||||
{
|
||||
|
||||
bli_dgemm_8x8( k, alpha,
|
||||
a,
|
||||
b, beta,
|
||||
c,
|
||||
rs_c, cs_c, NULL, NULL );
|
||||
bli_dgemm_8x8( k,
|
||||
alpha,
|
||||
a,
|
||||
b, beta,
|
||||
c,
|
||||
rs_c, cs_c,
|
||||
data );
|
||||
}
|
||||
|
||||
void bli_cgemm_8x8(
|
||||
dim_t k,
|
||||
scomplex* alpha,
|
||||
scomplex* a,
|
||||
scomplex* b,
|
||||
scomplex* beta,
|
||||
scomplex* c, inc_t rs_c, inc_t cs_c,
|
||||
scomplex* a_next, scomplex* b_next
|
||||
)
|
||||
dim_t k,
|
||||
scomplex* alpha,
|
||||
scomplex* a,
|
||||
scomplex* b,
|
||||
scomplex* beta,
|
||||
scomplex* c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
bli_cgemm_ref_mxn( k,
|
||||
@@ -259,19 +259,18 @@ void bli_cgemm_8x8(
|
||||
b,
|
||||
beta,
|
||||
c, rs_c, cs_c,
|
||||
a_next,
|
||||
b_next );
|
||||
data );
|
||||
}
|
||||
|
||||
void bli_zgemm_8x8(
|
||||
dim_t k,
|
||||
dcomplex* alpha,
|
||||
dcomplex* a,
|
||||
dcomplex* b,
|
||||
dcomplex* beta,
|
||||
dcomplex* c, inc_t rs_c, inc_t cs_c,
|
||||
dcomplex* a_next, dcomplex* b_next
|
||||
)
|
||||
dim_t k,
|
||||
dcomplex* alpha,
|
||||
dcomplex* a,
|
||||
dcomplex* b,
|
||||
dcomplex* beta,
|
||||
dcomplex* c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
bli_zgemm_ref_mxn( k,
|
||||
@@ -280,21 +279,20 @@ void bli_zgemm_8x8(
|
||||
b,
|
||||
beta,
|
||||
c, rs_c, cs_c,
|
||||
a_next,
|
||||
b_next );
|
||||
data );
|
||||
}
|
||||
|
||||
|
||||
void bli_sgemm_8x8_mt(
|
||||
dim_t k,
|
||||
float* alpha,
|
||||
float* a,
|
||||
float* b,
|
||||
float* beta,
|
||||
float* c, inc_t rs_c, inc_t cs_c,
|
||||
float* a_next, float* b_next,
|
||||
int t_id
|
||||
)
|
||||
dim_t k,
|
||||
float* alpha,
|
||||
float* a,
|
||||
float* b,
|
||||
float* beta,
|
||||
float* c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data,
|
||||
dim_t t_id
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
bli_sgemm_ref_mxn( k,
|
||||
@@ -303,20 +301,19 @@ void bli_sgemm_8x8_mt(
|
||||
b,
|
||||
beta,
|
||||
c, rs_c, cs_c,
|
||||
a_next,
|
||||
b_next );
|
||||
data );
|
||||
}
|
||||
|
||||
void bli_cgemm_8x8_mt(
|
||||
dim_t k,
|
||||
scomplex* alpha,
|
||||
scomplex* a,
|
||||
scomplex* b,
|
||||
scomplex* beta,
|
||||
scomplex* c, inc_t rs_c, inc_t cs_c,
|
||||
scomplex* a_next, scomplex* b_next,
|
||||
int t_id
|
||||
)
|
||||
dim_t k,
|
||||
scomplex* alpha,
|
||||
scomplex* a,
|
||||
scomplex* b,
|
||||
scomplex* beta,
|
||||
scomplex* c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data,
|
||||
dim_t t_id
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
bli_cgemm_ref_mxn( k,
|
||||
@@ -325,20 +322,19 @@ void bli_cgemm_8x8_mt(
|
||||
b,
|
||||
beta,
|
||||
c, rs_c, cs_c,
|
||||
a_next,
|
||||
b_next );
|
||||
data );
|
||||
}
|
||||
|
||||
void bli_zgemm_8x8_mt(
|
||||
dim_t k,
|
||||
dcomplex* alpha,
|
||||
dcomplex* a,
|
||||
dcomplex* b,
|
||||
dcomplex* beta,
|
||||
dcomplex* c, inc_t rs_c, inc_t cs_c,
|
||||
dcomplex* a_next, dcomplex* b_next,
|
||||
int t_id
|
||||
)
|
||||
dim_t k,
|
||||
dcomplex* alpha,
|
||||
dcomplex* a,
|
||||
dcomplex* b,
|
||||
dcomplex* beta,
|
||||
dcomplex* c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data,
|
||||
dim_t t_id
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
bli_zgemm_ref_mxn( k,
|
||||
@@ -347,6 +343,5 @@ void bli_zgemm_8x8_mt(
|
||||
b,
|
||||
beta,
|
||||
c, rs_c, cs_c,
|
||||
a_next,
|
||||
b_next );
|
||||
data );
|
||||
}
|
||||
|
||||
@@ -39,29 +39,30 @@
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname)( \
|
||||
dim_t k, \
|
||||
ctype* alpha, \
|
||||
ctype* a, \
|
||||
ctype* b, \
|
||||
ctype* beta, \
|
||||
ctype* c, inc_t rs_c, inc_t cs_c, \
|
||||
ctype* a_next, ctype* b_next \
|
||||
dim_t k, \
|
||||
ctype* alpha, \
|
||||
ctype* a, \
|
||||
ctype* b, \
|
||||
ctype* beta, \
|
||||
ctype* c, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( gemm_8x8 )
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname)( \
|
||||
dim_t k, \
|
||||
ctype* alpha, \
|
||||
ctype* a, \
|
||||
ctype* b, \
|
||||
ctype* beta, \
|
||||
ctype* c, inc_t rs_c, inc_t cs_c, \
|
||||
ctype* a_next, ctype* b_next, \
|
||||
dim_t tid \
|
||||
dim_t k, \
|
||||
ctype* alpha, \
|
||||
ctype* a, \
|
||||
ctype* b, \
|
||||
ctype* beta, \
|
||||
ctype* c, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data, \
|
||||
dim_t tid \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( gemm_8x8_mt )
|
||||
|
||||
@@ -45,8 +45,7 @@ void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict b, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
ctype* restrict a_next, \
|
||||
ctype* restrict b_next \
|
||||
auxinfo_t* data \
|
||||
) \
|
||||
{ \
|
||||
ctype a0; \
|
||||
|
||||
@@ -43,8 +43,7 @@ void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict b, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
ctype* restrict a_next, \
|
||||
ctype* restrict b_next \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( gemm_ref_4x4 )
|
||||
|
||||
@@ -46,8 +46,7 @@ void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict bT, \
|
||||
ctype* restrict b, \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
ctype* restrict a_next, \
|
||||
ctype* restrict b_next \
|
||||
auxinfo_t* data \
|
||||
) \
|
||||
{ \
|
||||
ctype* minus_one = PASTEMAC(ch,m1); \
|
||||
@@ -61,12 +60,12 @@ void PASTEMAC(ch,varname)( \
|
||||
bT, \
|
||||
alpha, \
|
||||
b, rs_b, cs_b, \
|
||||
a_next, \
|
||||
b_next ); \
|
||||
data ); \
|
||||
\
|
||||
PASTEMAC(ch,trsmukr)( a, \
|
||||
b, \
|
||||
c, rs_c, cs_c ); \
|
||||
c, rs_c, cs_c, \
|
||||
data ); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC2( gemmtrsm_l_ref_4x4, GEMM_UKERNEL, TRSM_L_UKERNEL )
|
||||
|
||||
@@ -44,8 +44,7 @@ void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict bT, \
|
||||
ctype* restrict b, \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
ctype* restrict a_next, \
|
||||
ctype* restrict b_next \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( gemmtrsm_l_ref_4x4 )
|
||||
|
||||
@@ -46,8 +46,7 @@ void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict bB, \
|
||||
ctype* restrict b, \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
ctype* restrict a_next, \
|
||||
ctype* restrict b_next \
|
||||
auxinfo_t* data \
|
||||
) \
|
||||
{ \
|
||||
ctype* minus_one = PASTEMAC(ch,m1); \
|
||||
@@ -61,11 +60,12 @@ void PASTEMAC(ch,varname)( \
|
||||
bB, \
|
||||
alpha, \
|
||||
b, rs_b, cs_b, \
|
||||
a_next, b_next ); \
|
||||
data ); \
|
||||
\
|
||||
PASTEMAC(ch,trsmukr)( a, \
|
||||
b, \
|
||||
c, rs_c, cs_c ); \
|
||||
c, rs_c, cs_c, \
|
||||
data ); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC2( gemmtrsm_u_ref_4x4, GEMM_UKERNEL, TRSM_U_UKERNEL )
|
||||
|
||||
@@ -44,8 +44,7 @@ void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict bB, \
|
||||
ctype* restrict b, \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
ctype* restrict a_next, \
|
||||
ctype* restrict b_next \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( gemmtrsm_u_ref_4x4 )
|
||||
|
||||
@@ -41,7 +41,8 @@
|
||||
void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict a, \
|
||||
ctype* restrict b, \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
) \
|
||||
{ \
|
||||
const dim_t rs_a = 1; \
|
||||
|
||||
@@ -39,7 +39,8 @@
|
||||
void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict a, \
|
||||
ctype* restrict b, \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( trsm_l_ref_4x4 )
|
||||
|
||||
@@ -41,7 +41,8 @@
|
||||
void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict a, \
|
||||
ctype* restrict b, \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
) \
|
||||
{ \
|
||||
const dim_t rs_a = 1; \
|
||||
|
||||
@@ -39,7 +39,8 @@
|
||||
void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict a, \
|
||||
ctype* restrict b, \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( trsm_u_ref_4x4 )
|
||||
|
||||
@@ -41,9 +41,7 @@ void bli_sgemm_opt_d4x4(
|
||||
float* restrict b,
|
||||
float* restrict beta,
|
||||
float* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
float* restrict a_next,
|
||||
float* restrict b_next
|
||||
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
@@ -53,8 +51,7 @@ void bli_sgemm_opt_d4x4(
|
||||
b,
|
||||
beta,
|
||||
c, rs_c, cs_c,
|
||||
a_next,
|
||||
b_next );
|
||||
data );
|
||||
}
|
||||
|
||||
void bli_dgemm_opt_d4x4(
|
||||
@@ -64,16 +61,11 @@ void bli_dgemm_opt_d4x4(
|
||||
double* restrict b,
|
||||
double* restrict beta,
|
||||
double* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
double* restrict a_next,
|
||||
double* restrict b_next
|
||||
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
dim_t k_iter;
|
||||
dim_t k_left;
|
||||
|
||||
k_iter = k / 4;
|
||||
k_left = k % 4;
|
||||
dim_t k_iter = k / 4;
|
||||
dim_t k_left = k % 4;
|
||||
|
||||
__asm__ volatile
|
||||
(
|
||||
@@ -551,8 +543,7 @@ void bli_cgemm_opt_d4x4(
|
||||
scomplex* restrict b,
|
||||
scomplex* restrict beta,
|
||||
scomplex* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
scomplex* restrict a_next,
|
||||
scomplex* restrict b_next
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
@@ -562,8 +553,7 @@ void bli_cgemm_opt_d4x4(
|
||||
b,
|
||||
beta,
|
||||
c, rs_c, cs_c,
|
||||
a_next,
|
||||
b_next );
|
||||
data );
|
||||
}
|
||||
|
||||
void bli_zgemm_opt_d4x4(
|
||||
@@ -573,8 +563,7 @@ void bli_zgemm_opt_d4x4(
|
||||
dcomplex* restrict b,
|
||||
dcomplex* restrict beta,
|
||||
dcomplex* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
dcomplex* restrict a_next,
|
||||
dcomplex* restrict b_next
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
@@ -584,7 +573,6 @@ void bli_zgemm_opt_d4x4(
|
||||
b,
|
||||
beta,
|
||||
c, rs_c, cs_c,
|
||||
a_next,
|
||||
b_next );
|
||||
data );
|
||||
}
|
||||
|
||||
|
||||
@@ -43,8 +43,7 @@ void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict b, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
ctype* restrict a_next, \
|
||||
ctype* restrict b_next \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( gemm_opt_d4x4 )
|
||||
|
||||
@@ -36,15 +36,15 @@
|
||||
#include <assert.h>
|
||||
|
||||
void bli_sgemm_opt_30x8(
|
||||
dim_t k, float* alpha,
|
||||
float* a,
|
||||
float* b,
|
||||
float* beta,
|
||||
float* c, inc_t rs_c, inc_t cs_c,
|
||||
float* a_next,
|
||||
float* b_next,
|
||||
dim_t thread_id
|
||||
)
|
||||
dim_t k,
|
||||
float* alpha,
|
||||
float* a,
|
||||
float* b,
|
||||
float* beta,
|
||||
float* c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data,
|
||||
dim_t thread_id
|
||||
)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -324,15 +324,15 @@ int offsets[16] __attribute__((aligned(0x1000))) = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9
|
||||
//#define MONITORS
|
||||
//#define LOOPMON
|
||||
void bli_dgemm_opt_30x8(
|
||||
dim_t k, double* alpha,
|
||||
double* a,
|
||||
double* b,
|
||||
double* beta,
|
||||
double* c, inc_t rs_c, inc_t cs_c,
|
||||
double* a_next,
|
||||
double* b_next,
|
||||
dim_t thread_id
|
||||
)
|
||||
dim_t k,
|
||||
double* alpha,
|
||||
double* a,
|
||||
double* b,
|
||||
double* beta,
|
||||
double* c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data,
|
||||
dim_t thread_id
|
||||
)
|
||||
{
|
||||
|
||||
int * offsetPtr = &offsets[0];
|
||||
@@ -627,30 +627,29 @@ void bli_dgemm_opt_30x8(
|
||||
}
|
||||
|
||||
|
||||
|
||||
void bli_cgemm_opt_30x8(
|
||||
dim_t k, scomplex* alpha,
|
||||
scomplex* a,
|
||||
scomplex* b,
|
||||
scomplex* beta,
|
||||
scomplex* c, inc_t rs_c, inc_t cs_c,
|
||||
scomplex* a_next,
|
||||
scomplex* b_next,
|
||||
dim_t thread_id
|
||||
)
|
||||
dim_t k,
|
||||
scomplex* alpha,
|
||||
scomplex* a,
|
||||
scomplex* b,
|
||||
scomplex* beta,
|
||||
scomplex* c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data,
|
||||
dim_t thread_id
|
||||
)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void bli_zgemm_opt_30x8(
|
||||
dim_t k, dcomplex* alpha,
|
||||
dcomplex* a,
|
||||
dcomplex* b,
|
||||
dcomplex* beta,
|
||||
dcomplex* c, inc_t rs_c, inc_t cs_c,
|
||||
dcomplex* a_next,
|
||||
dcomplex* b_next,
|
||||
dim_t thread_id
|
||||
)
|
||||
dim_t k,
|
||||
dcomplex* alpha,
|
||||
dcomplex* a,
|
||||
dcomplex* b,
|
||||
dcomplex* beta,
|
||||
dcomplex* c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data,
|
||||
dim_t thread_id
|
||||
)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -39,14 +39,14 @@
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname)( \
|
||||
dim_t k, \
|
||||
ctype* alpha, \
|
||||
ctype* a, \
|
||||
ctype* b, \
|
||||
ctype* beta, \
|
||||
ctype* c, inc_t rs_c, inc_t cs_c, \
|
||||
ctype* a_next, ctype* b_next, \
|
||||
dim_t thread_id \
|
||||
dim_t k, \
|
||||
ctype* alpha, \
|
||||
ctype* a, \
|
||||
ctype* b, \
|
||||
ctype* beta, \
|
||||
ctype* c, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data, \
|
||||
dim_t thread_id \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( gemm_opt_30x8 )
|
||||
|
||||
@@ -49,15 +49,14 @@
|
||||
* b is k x nr in packed row-maj format (leading dim is nr)
|
||||
*/
|
||||
void bli_sgemm_opt_8x4(
|
||||
dim_t k,
|
||||
float* restrict alpha,
|
||||
float* restrict a,
|
||||
float* restrict b,
|
||||
float* restrict beta,
|
||||
float* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
float* restrict a_next,
|
||||
float* restrict b_next
|
||||
)
|
||||
dim_t k,
|
||||
float* restrict alpha,
|
||||
float* restrict a,
|
||||
float* restrict b,
|
||||
float* restrict beta,
|
||||
float* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
#if 0 || defined(UTEST)
|
||||
const long MR = BLIS_DEFAULT_MR_S, NR = BLIS_DEFAULT_NR_S;
|
||||
@@ -74,7 +73,7 @@ void bli_sgemm_opt_8x4(
|
||||
}
|
||||
}
|
||||
#else
|
||||
bli_sgemm_ref_mxn(k, alpha, a, b, beta, c, rs_c, cs_c, a_next, b_next);
|
||||
bli_sgemm_ref_mxn(k, alpha, a, b, beta, c, rs_c, cs_c, data);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -88,15 +87,14 @@ void bli_sgemm_opt_8x4(
|
||||
* b is k x nr in packed row-maj format (leading dim is nr)
|
||||
*/
|
||||
void bli_dgemm_opt_8x4(
|
||||
dim_t k,
|
||||
double* restrict alpha,
|
||||
double* restrict a,
|
||||
double* restrict b,
|
||||
double* restrict beta,
|
||||
double* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
double* restrict a_next,
|
||||
double* restrict b_next
|
||||
)
|
||||
dim_t k,
|
||||
double* restrict alpha,
|
||||
double* restrict a,
|
||||
double* restrict b,
|
||||
double* restrict beta,
|
||||
double* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
#if 1
|
||||
if (rs_c == 1) {
|
||||
@@ -447,7 +445,7 @@ void bli_dgemm_opt_8x4(
|
||||
}
|
||||
}
|
||||
#else
|
||||
bli_dgemm_ref_mxn(k, alpha, a, b, beta, c, rs_c, cs_c, a_next, b_next);
|
||||
bli_dgemm_ref_mxn(k, alpha, a, b, beta, c, rs_c, cs_c, data);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@@ -462,14 +460,13 @@ void bli_dgemm_opt_8x4(
|
||||
* b is k x nr in packed row-maj format (leading dim is nr)
|
||||
*/
|
||||
void bli_cgemm_opt_8x4(
|
||||
dim_t k,
|
||||
scomplex* restrict alpha,
|
||||
scomplex* restrict a,
|
||||
scomplex* restrict b,
|
||||
scomplex* restrict beta,
|
||||
scomplex* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
scomplex* restrict a_next,
|
||||
scomplex* restrict b_next
|
||||
dim_t k,
|
||||
scomplex* restrict alpha,
|
||||
scomplex* restrict a,
|
||||
scomplex* restrict b,
|
||||
scomplex* restrict beta,
|
||||
scomplex* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
#if 0 || defined(UTEST)
|
||||
@@ -498,7 +495,7 @@ void bli_cgemm_opt_8x4(
|
||||
}
|
||||
}
|
||||
#else
|
||||
bli_cgemm_ref_mxn(k, alpha, a, b, beta, c, rs_c, cs_c, a_next, b_next);
|
||||
bli_cgemm_ref_mxn(k, alpha, a, b, beta, c, rs_c, cs_c, data);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -512,14 +509,13 @@ void bli_cgemm_opt_8x4(
|
||||
* b is k x nr in packed row-maj format (leading dim is nr)
|
||||
*/
|
||||
void bli_zgemm_opt_8x4(
|
||||
dim_t k,
|
||||
dcomplex* restrict alpha,
|
||||
dcomplex* restrict a,
|
||||
dcomplex* restrict b,
|
||||
dcomplex* restrict beta,
|
||||
dcomplex* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
dcomplex* restrict a_next,
|
||||
dcomplex* restrict b_next
|
||||
dim_t k,
|
||||
dcomplex* restrict alpha,
|
||||
dcomplex* restrict a,
|
||||
dcomplex* restrict b,
|
||||
dcomplex* restrict beta,
|
||||
dcomplex* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
#if 0 || defined(UTEST)
|
||||
@@ -548,7 +544,7 @@ void bli_zgemm_opt_8x4(
|
||||
}
|
||||
}
|
||||
#else
|
||||
bli_zgemm_ref_mxn(k, alpha, a, b, beta, c, rs_c, cs_c, a_next, b_next);
|
||||
bli_zgemm_ref_mxn(k, alpha, a, b, beta, c, rs_c, cs_c, data);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@@ -42,25 +42,23 @@
|
||||
#endif
|
||||
|
||||
void bli_sgemm_opt_8x4(
|
||||
dim_t k,
|
||||
float* restrict alpha,
|
||||
float* restrict a,
|
||||
float* restrict b,
|
||||
float* restrict beta,
|
||||
float* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
float* restrict a_next,
|
||||
float* restrict b_next
|
||||
dim_t k,
|
||||
float* restrict alpha,
|
||||
float* restrict a,
|
||||
float* restrict b,
|
||||
float* restrict beta,
|
||||
float* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
);
|
||||
|
||||
void bli_dgemm_opt_8x4(
|
||||
dim_t k,
|
||||
double* restrict alpha,
|
||||
double* restrict a,
|
||||
double* restrict b,
|
||||
double* restrict beta,
|
||||
double* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
double* restrict a_next,
|
||||
double* restrict b_next
|
||||
dim_t k,
|
||||
double* restrict alpha,
|
||||
double* restrict a,
|
||||
double* restrict b,
|
||||
double* restrict beta,
|
||||
double* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
);
|
||||
|
||||
void bli_cgemm_opt_8x4(
|
||||
@@ -70,8 +68,7 @@ void bli_cgemm_opt_8x4(
|
||||
scomplex* restrict b,
|
||||
scomplex* restrict beta,
|
||||
scomplex* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
scomplex* restrict a_next,
|
||||
scomplex* restrict b_next
|
||||
auxinfo_t* data
|
||||
);
|
||||
|
||||
void bli_zgemm_opt_8x4(
|
||||
@@ -81,8 +78,7 @@ void bli_zgemm_opt_8x4(
|
||||
dcomplex* restrict b,
|
||||
dcomplex* restrict beta,
|
||||
dcomplex* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
dcomplex* restrict a_next,
|
||||
dcomplex* restrict b_next
|
||||
auxinfo_t* data
|
||||
);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -29,15 +29,14 @@
|
||||
* b is k x nr in packed row-maj format (leading dim is nr)
|
||||
*/
|
||||
void bli_dgemm_check(
|
||||
dim_t k,
|
||||
double* restrict alpha,
|
||||
double* restrict a,
|
||||
double* restrict b,
|
||||
double* restrict beta,
|
||||
double* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
double* restrict a_next,
|
||||
double* restrict b_next
|
||||
)
|
||||
dim_t k,
|
||||
double* restrict alpha,
|
||||
double* restrict a,
|
||||
double* restrict b,
|
||||
double* restrict beta,
|
||||
double* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
int i, j, kk;
|
||||
double c00;
|
||||
@@ -100,9 +99,9 @@ int main(int argc, char *argv[])
|
||||
|
||||
/* First check the results */
|
||||
|
||||
bli_dgemm_opt_8x4(k, &alpha, A, B, &beta, C, rs_c, cs_c, 0, 0);
|
||||
bli_dgemm_opt_8x4(k, &alpha, A, B, &beta, C, rs_c, cs_c, NULL);
|
||||
|
||||
bli_dgemm_check(k, &alpha, A, B, &beta, C2, rs_c, cs_c, 0, 0);
|
||||
bli_dgemm_check(k, &alpha, A, B, &beta, C2, rs_c, cs_c, NULL);
|
||||
|
||||
for (i=0, errors=0; i<MR*NR-1; i++) {
|
||||
if (fabs(C[i] - C2[i]) > EPSILON) {
|
||||
@@ -121,7 +120,7 @@ int main(int argc, char *argv[])
|
||||
gettimeofday(&tv_start, NULL);
|
||||
|
||||
for (i=0; i<iters; i++) {
|
||||
bli_dgemm_opt_8x4(k, &alpha, A, B, &beta, C, rs_c, cs_c, 0, 0);
|
||||
bli_dgemm_opt_8x4(k, &alpha, A, B, &beta, C, rs_c, cs_c, NULL);
|
||||
}
|
||||
|
||||
gettimeofday(&tv_end, NULL);
|
||||
|
||||
@@ -39,13 +39,12 @@
|
||||
|
||||
void bli_sgemm_opt_8x4_ref_u4_nodupl_avx1(
|
||||
dim_t k,
|
||||
float* restrict alpha,
|
||||
float* restrict a,
|
||||
float* restrict b,
|
||||
float* restrict beta,
|
||||
float* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
float* restrict a_next,
|
||||
float* restrict b_next
|
||||
float* restrict alpha,
|
||||
float* restrict a,
|
||||
float* restrict b,
|
||||
float* restrict beta,
|
||||
float* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
@@ -55,30 +54,28 @@ void bli_sgemm_opt_8x4_ref_u4_nodupl_avx1(
|
||||
b,
|
||||
beta,
|
||||
c, rs_c, cs_c,
|
||||
a_next,
|
||||
b_next );
|
||||
data );
|
||||
}
|
||||
|
||||
|
||||
|
||||
void bli_dgemm_opt_8x4_ref_u4_nodupl_avx1(
|
||||
dim_t k,
|
||||
double* restrict alpha,
|
||||
double* restrict a,
|
||||
double* restrict b,
|
||||
double* restrict beta,
|
||||
double* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
double* restrict a_next,
|
||||
double* restrict b_next
|
||||
dim_t k,
|
||||
double* restrict alpha,
|
||||
double* restrict a,
|
||||
double* restrict b,
|
||||
double* restrict beta,
|
||||
double* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
dim_t i;
|
||||
//void* a_next = bli_auxinfo_next_a( data );
|
||||
void* b_next = bli_auxinfo_next_b( data );
|
||||
|
||||
dim_t k_iter;
|
||||
dim_t k_left;
|
||||
dim_t k_iter = k / 2;
|
||||
dim_t k_left = k % 2;
|
||||
|
||||
k_iter = k / 2;
|
||||
k_left = k % 2;
|
||||
dim_t i;
|
||||
|
||||
double *c00, *c01, *c02, *c03;
|
||||
double *c40, *c41, *c42, *c43;
|
||||
@@ -641,8 +638,7 @@ void bli_cgemm_opt_8x4_ref_u4_nodupl_avx1(
|
||||
scomplex* restrict b,
|
||||
scomplex* restrict beta,
|
||||
scomplex* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
scomplex* restrict a_next,
|
||||
scomplex* restrict b_next
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
@@ -652,8 +648,7 @@ void bli_cgemm_opt_8x4_ref_u4_nodupl_avx1(
|
||||
b,
|
||||
beta,
|
||||
c, rs_c, cs_c,
|
||||
a_next,
|
||||
b_next );
|
||||
data );
|
||||
}
|
||||
|
||||
|
||||
@@ -665,8 +660,7 @@ void bli_zgemm_opt_8x4_ref_u4_nodupl_avx1(
|
||||
dcomplex* restrict b,
|
||||
dcomplex* restrict beta,
|
||||
dcomplex* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
dcomplex* restrict a_next,
|
||||
dcomplex* restrict b_next
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
@@ -676,7 +670,6 @@ void bli_zgemm_opt_8x4_ref_u4_nodupl_avx1(
|
||||
b,
|
||||
beta,
|
||||
c, rs_c, cs_c,
|
||||
a_next,
|
||||
b_next );
|
||||
data );
|
||||
}
|
||||
|
||||
|
||||
@@ -43,8 +43,7 @@ void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict b, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
ctype* restrict a_next, \
|
||||
ctype* restrict b_next \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( gemm_opt_8x4_ref_u4_nodupl_avx1 )
|
||||
|
||||
@@ -41,15 +41,14 @@ void bli_sgemm_opt_d4x4(
|
||||
float* restrict b,
|
||||
float* restrict beta,
|
||||
float* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
float* restrict a_next,
|
||||
float* restrict b_next
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
dim_t k_iter;
|
||||
dim_t k_left;
|
||||
void* a_next = bli_auxinfo_next_a( data );
|
||||
void* b_next = bli_auxinfo_next_b( data );
|
||||
|
||||
k_iter = k / 4;
|
||||
k_left = k % 4;
|
||||
dim_t k_iter = k / 4;
|
||||
dim_t k_left = k % 4;
|
||||
|
||||
__asm__ volatile
|
||||
(
|
||||
@@ -723,15 +722,14 @@ void bli_dgemm_opt_d4x4(
|
||||
double* restrict b,
|
||||
double* restrict beta,
|
||||
double* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
double* restrict a_next,
|
||||
double* restrict b_next
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
dim_t k_iter;
|
||||
dim_t k_left;
|
||||
void* a_next = bli_auxinfo_next_a( data );
|
||||
void* b_next = bli_auxinfo_next_b( data );
|
||||
|
||||
k_iter = k / 4;
|
||||
k_left = k % 4;
|
||||
dim_t k_iter = k / 4;
|
||||
dim_t k_left = k % 4;
|
||||
|
||||
__asm__ volatile
|
||||
(
|
||||
@@ -1339,8 +1337,7 @@ void bli_cgemm_opt_d4x4(
|
||||
scomplex* restrict b,
|
||||
scomplex* restrict beta,
|
||||
scomplex* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
scomplex* restrict a_next,
|
||||
scomplex* restrict b_next
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
@@ -1350,8 +1347,7 @@ void bli_cgemm_opt_d4x4(
|
||||
b,
|
||||
beta,
|
||||
c, rs_c, cs_c,
|
||||
a_next,
|
||||
b_next );
|
||||
data );
|
||||
}
|
||||
|
||||
void bli_zgemm_opt_d4x4(
|
||||
@@ -1361,8 +1357,7 @@ void bli_zgemm_opt_d4x4(
|
||||
dcomplex* restrict b,
|
||||
dcomplex* restrict beta,
|
||||
dcomplex* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
dcomplex* restrict a_next,
|
||||
dcomplex* restrict b_next
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
@@ -1372,7 +1367,6 @@ void bli_zgemm_opt_d4x4(
|
||||
b,
|
||||
beta,
|
||||
c, rs_c, cs_c,
|
||||
a_next,
|
||||
b_next );
|
||||
data );
|
||||
}
|
||||
|
||||
|
||||
@@ -43,8 +43,7 @@ void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict b, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
ctype* restrict a_next, \
|
||||
ctype* restrict b_next \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( gemm_opt_d4x4 )
|
||||
|
||||
@@ -42,8 +42,7 @@ void bli_sgemmtrsm_l_opt_d4x4(
|
||||
float* restrict b01,
|
||||
float* restrict b11,
|
||||
float* restrict c11, inc_t rs_c, inc_t cs_c,
|
||||
float* restrict a_next,
|
||||
float* restrict b_next
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
@@ -54,8 +53,7 @@ void bli_sgemmtrsm_l_opt_d4x4(
|
||||
b01,
|
||||
b11,
|
||||
c11, rs_c, cs_c,
|
||||
a_next,
|
||||
b_next );
|
||||
data );
|
||||
}
|
||||
|
||||
void bli_dgemmtrsm_l_opt_d4x4(
|
||||
@@ -66,15 +64,13 @@ void bli_dgemmtrsm_l_opt_d4x4(
|
||||
double* restrict b01,
|
||||
double* restrict b11,
|
||||
double* restrict c11, inc_t rs_c, inc_t cs_c,
|
||||
double* restrict a_next,
|
||||
double* restrict b_next
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
dim_t k_iter;
|
||||
dim_t k_left;
|
||||
void* b_next = bli_auxinfo_next_b( data );
|
||||
|
||||
k_iter = k / 4;
|
||||
k_left = k % 4;
|
||||
dim_t k_iter = k / 4;
|
||||
dim_t k_left = k % 4;
|
||||
|
||||
__asm__ volatile
|
||||
(
|
||||
@@ -551,8 +547,7 @@ void bli_cgemmtrsm_l_opt_d4x4(
|
||||
scomplex* restrict b01,
|
||||
scomplex* restrict b11,
|
||||
scomplex* restrict c11, inc_t rs_c, inc_t cs_c,
|
||||
scomplex* restrict a_next,
|
||||
scomplex* restrict b_next
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
@@ -563,8 +558,7 @@ void bli_cgemmtrsm_l_opt_d4x4(
|
||||
b01,
|
||||
b11,
|
||||
c11, rs_c, cs_c,
|
||||
a_next,
|
||||
b_next );
|
||||
data );
|
||||
}
|
||||
|
||||
void bli_zgemmtrsm_l_opt_d4x4(
|
||||
@@ -575,8 +569,7 @@ void bli_zgemmtrsm_l_opt_d4x4(
|
||||
dcomplex* restrict b01,
|
||||
dcomplex* restrict b11,
|
||||
dcomplex* restrict c11, inc_t rs_c, inc_t cs_c,
|
||||
dcomplex* restrict a_next,
|
||||
dcomplex* restrict b_next
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
@@ -587,7 +580,6 @@ void bli_zgemmtrsm_l_opt_d4x4(
|
||||
b01,
|
||||
b11,
|
||||
c11, rs_c, cs_c,
|
||||
a_next,
|
||||
b_next );
|
||||
data );
|
||||
}
|
||||
|
||||
|
||||
@@ -44,8 +44,7 @@ void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict b01, \
|
||||
ctype* restrict b11, \
|
||||
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
|
||||
ctype* restrict a_next, \
|
||||
ctype* restrict b_next \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( gemmtrsm_l_opt_d4x4 )
|
||||
|
||||
@@ -42,8 +42,7 @@ void bli_sgemmtrsm_u_opt_d4x4(
|
||||
float* restrict b21,
|
||||
float* restrict b11,
|
||||
float* restrict c11, inc_t rs_c, inc_t cs_c,
|
||||
float* restrict a_next,
|
||||
float* restrict b_next
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
@@ -54,8 +53,7 @@ void bli_sgemmtrsm_u_opt_d4x4(
|
||||
b21,
|
||||
b11,
|
||||
c11, rs_c, cs_c,
|
||||
a_next,
|
||||
b_next );
|
||||
data );
|
||||
}
|
||||
|
||||
void bli_dgemmtrsm_u_opt_d4x4(
|
||||
@@ -66,15 +64,14 @@ void bli_dgemmtrsm_u_opt_d4x4(
|
||||
double* restrict b21,
|
||||
double* restrict b11,
|
||||
double* restrict c11, inc_t rs_c, inc_t cs_c,
|
||||
double* restrict a_next,
|
||||
double* restrict b_next
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
dim_t k_iter;
|
||||
dim_t k_left;
|
||||
void* b_next = bli_auxinfo_next_b( data );
|
||||
|
||||
dim_t k_iter = k / 4;
|
||||
dim_t k_left = k % 4;
|
||||
|
||||
k_iter = k / 4;
|
||||
k_left = k % 4;
|
||||
|
||||
__asm__ volatile
|
||||
(
|
||||
@@ -537,8 +534,7 @@ void bli_cgemmtrsm_u_opt_d4x4(
|
||||
scomplex* restrict b21,
|
||||
scomplex* restrict b11,
|
||||
scomplex* restrict c11, inc_t rs_c, inc_t cs_c,
|
||||
scomplex* restrict a_next,
|
||||
scomplex* restrict b_next
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
@@ -549,8 +545,7 @@ void bli_cgemmtrsm_u_opt_d4x4(
|
||||
b21,
|
||||
b11,
|
||||
c11, rs_c, cs_c,
|
||||
a_next,
|
||||
b_next );
|
||||
data );
|
||||
}
|
||||
|
||||
void bli_zgemmtrsm_u_opt_d4x4(
|
||||
@@ -561,8 +556,7 @@ void bli_zgemmtrsm_u_opt_d4x4(
|
||||
dcomplex* restrict b21,
|
||||
dcomplex* restrict b11,
|
||||
dcomplex* restrict c11, inc_t rs_c, inc_t cs_c,
|
||||
dcomplex* restrict a_next,
|
||||
dcomplex* restrict b_next
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
@@ -573,7 +567,6 @@ void bli_zgemmtrsm_u_opt_d4x4(
|
||||
b21,
|
||||
b11,
|
||||
c11, rs_c, cs_c,
|
||||
a_next,
|
||||
b_next );
|
||||
data );
|
||||
}
|
||||
|
||||
|
||||
@@ -44,8 +44,7 @@ void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict b21, \
|
||||
ctype* restrict b11, \
|
||||
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
|
||||
ctype* restrict a_next, \
|
||||
ctype* restrict b_next \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( gemmtrsm_u_opt_d4x4 )
|
||||
|
||||
@@ -37,19 +37,22 @@
|
||||
void bli_strsm_l_opt_d4x4(
|
||||
float* restrict a11,
|
||||
float* restrict b11,
|
||||
float* restrict c11, inc_t rs_c, inc_t cs_c
|
||||
float* restrict c11, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
bli_strsm_l_ref_mxn( a11,
|
||||
b11,
|
||||
c11, rs_c, cs_c );
|
||||
c11, rs_c, cs_c,
|
||||
data );
|
||||
}
|
||||
|
||||
void bli_dtrsm_l_opt_d4x4(
|
||||
double* restrict a11,
|
||||
double* restrict b11,
|
||||
double* restrict c11, inc_t rs_c, inc_t cs_c
|
||||
double* restrict c11, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
__asm__ volatile
|
||||
@@ -208,24 +211,28 @@ void bli_dtrsm_l_opt_d4x4(
|
||||
void bli_ctrsm_l_opt_d4x4(
|
||||
scomplex* restrict a11,
|
||||
scomplex* restrict b11,
|
||||
scomplex* restrict c11, inc_t rs_c, inc_t cs_c
|
||||
scomplex* restrict c11, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
bli_ctrsm_l_ref_mxn( a11,
|
||||
b11,
|
||||
c11, rs_c, cs_c );
|
||||
c11, rs_c, cs_c,
|
||||
data );
|
||||
}
|
||||
|
||||
void bli_ztrsm_l_opt_d4x4(
|
||||
dcomplex* restrict a11,
|
||||
dcomplex* restrict b11,
|
||||
dcomplex* restrict c11, inc_t rs_c, inc_t cs_c
|
||||
dcomplex* restrict c11, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
bli_ztrsm_l_ref_mxn( a11,
|
||||
b11,
|
||||
c11, rs_c, cs_c );
|
||||
c11, rs_c, cs_c,
|
||||
data );
|
||||
}
|
||||
|
||||
|
||||
@@ -39,7 +39,8 @@
|
||||
void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict a11, \
|
||||
ctype* restrict b11, \
|
||||
ctype* restrict c11, inc_t rs_c, inc_t cs_c \
|
||||
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( trsm_l_opt_d4x4 )
|
||||
|
||||
@@ -37,19 +37,22 @@
|
||||
void bli_strsm_u_opt_d4x4(
|
||||
float* restrict a11,
|
||||
float* restrict b11,
|
||||
float* restrict c11, inc_t rs_c, inc_t cs_c
|
||||
float* restrict c11, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
bli_strsm_u_ref_mxn( a11,
|
||||
b11,
|
||||
c11, rs_c, cs_c );
|
||||
c11, rs_c, cs_c,
|
||||
data );
|
||||
}
|
||||
|
||||
void bli_dtrsm_u_opt_d4x4(
|
||||
double* restrict a11,
|
||||
double* restrict b11,
|
||||
double* restrict c11, inc_t rs_c, inc_t cs_c
|
||||
double* restrict c11, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
__asm__ volatile
|
||||
@@ -211,24 +214,28 @@ void bli_dtrsm_u_opt_d4x4(
|
||||
void bli_ctrsm_u_opt_d4x4(
|
||||
scomplex* restrict a11,
|
||||
scomplex* restrict b11,
|
||||
scomplex* restrict c11, inc_t rs_c, inc_t cs_c
|
||||
scomplex* restrict c11, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
bli_ctrsm_u_ref_mxn( a11,
|
||||
b11,
|
||||
c11, rs_c, cs_c );
|
||||
c11, rs_c, cs_c,
|
||||
data );
|
||||
}
|
||||
|
||||
void bli_ztrsm_u_opt_d4x4(
|
||||
dcomplex* restrict a11,
|
||||
dcomplex* restrict b11,
|
||||
dcomplex* restrict c11, inc_t rs_c, inc_t cs_c
|
||||
dcomplex* restrict c11, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
bli_ztrsm_u_ref_mxn( a11,
|
||||
b11,
|
||||
c11, rs_c, cs_c );
|
||||
c11, rs_c, cs_c,
|
||||
data );
|
||||
}
|
||||
|
||||
|
||||
@@ -39,7 +39,8 @@
|
||||
void PASTEMAC(ch,varname)( \
|
||||
ctype* restrict a11, \
|
||||
ctype* restrict b11, \
|
||||
ctype* restrict c11, inc_t rs_c, inc_t cs_c \
|
||||
ctype* restrict c11, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( trsm_u_opt_d4x4 )
|
||||
|
||||
@@ -36,14 +36,14 @@
|
||||
|
||||
|
||||
|
||||
void bli_sgemm_4x6(
|
||||
dim_t k,
|
||||
float* alpha,
|
||||
float* a,
|
||||
float* b,
|
||||
float* beta,
|
||||
float* c, inc_t rs_c, inc_t cs_c,
|
||||
float* a_next, float* b_next
|
||||
void bli_sgemm_4x6(
|
||||
dim_t k,
|
||||
float* restrict alpha,
|
||||
float* restrict a,
|
||||
float* restrict b,
|
||||
float* restrict beta,
|
||||
float* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
@@ -53,24 +53,21 @@
|
||||
b,
|
||||
beta,
|
||||
c, rs_c, cs_c,
|
||||
a_next,
|
||||
b_next );
|
||||
data );
|
||||
}
|
||||
|
||||
void bli_dgemm_4x6(
|
||||
dim_t k,
|
||||
double* alpha,
|
||||
double* a,
|
||||
double* b,
|
||||
double* beta,
|
||||
double* c, inc_t rs_c, inc_t cs_c,
|
||||
double* a_next, double* b_next
|
||||
void bli_dgemm_4x6(
|
||||
dim_t k,
|
||||
double* restrict alpha,
|
||||
double* restrict a,
|
||||
double* restrict b,
|
||||
double* restrict beta,
|
||||
double* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
dim_t k_iter;
|
||||
dim_t k_left;
|
||||
k_iter = k / 16;
|
||||
k_left = k % 16;
|
||||
dim_t k_iter = k / 16;
|
||||
dim_t k_left = k % 16;
|
||||
|
||||
__asm__
|
||||
(
|
||||
@@ -674,14 +671,14 @@
|
||||
);
|
||||
}
|
||||
|
||||
void bli_cgemm_4x6(
|
||||
dim_t k,
|
||||
scomplex* alpha,
|
||||
scomplex* a,
|
||||
scomplex* b,
|
||||
scomplex* beta,
|
||||
scomplex* c, inc_t rs_c, inc_t cs_c,
|
||||
scomplex* a_next, scomplex* b_next
|
||||
void bli_cgemm_4x6(
|
||||
dim_t k,
|
||||
scomplex* restrict alpha,
|
||||
scomplex* restrict a,
|
||||
scomplex* restrict b,
|
||||
scomplex* restrict beta,
|
||||
scomplex* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
@@ -691,18 +688,17 @@
|
||||
b,
|
||||
beta,
|
||||
c, rs_c, cs_c,
|
||||
a_next,
|
||||
b_next );
|
||||
data );
|
||||
}
|
||||
|
||||
void bli_zgemm_4x6(
|
||||
dim_t k,
|
||||
dcomplex* alpha,
|
||||
dcomplex* a,
|
||||
dcomplex* b,
|
||||
dcomplex* beta,
|
||||
dcomplex* c, inc_t rs_c, inc_t cs_c,
|
||||
dcomplex* a_next, dcomplex* b_next
|
||||
void bli_zgemm_4x6(
|
||||
dim_t k,
|
||||
dcomplex* restrict alpha,
|
||||
dcomplex* restrict a,
|
||||
dcomplex* restrict b,
|
||||
dcomplex* restrict beta,
|
||||
dcomplex* restrict c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
)
|
||||
{
|
||||
/* Just call the reference implementation. */
|
||||
@@ -712,7 +708,6 @@
|
||||
b,
|
||||
beta,
|
||||
c, rs_c, cs_c,
|
||||
a_next,
|
||||
b_next );
|
||||
data );
|
||||
}
|
||||
|
||||
|
||||
@@ -39,13 +39,13 @@
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname)( \
|
||||
dim_t k, \
|
||||
ctype* alpha, \
|
||||
ctype* a, \
|
||||
ctype* b, \
|
||||
ctype* beta, \
|
||||
ctype* c, inc_t rs_c, inc_t cs_c, \
|
||||
ctype* a_next, ctype* b_next \
|
||||
dim_t k, \
|
||||
ctype* restrict alpha, \
|
||||
ctype* restrict a, \
|
||||
ctype* restrict b, \
|
||||
ctype* restrict beta, \
|
||||
ctype* restrict c, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( gemm_4x6 )
|
||||
|
||||
@@ -119,7 +119,7 @@ endif
|
||||
|
||||
# BLIS library and header path. This is simply wherever it was installed.
|
||||
BLIS_LIB_PATH := $(INSTALL_PREFIX)/lib
|
||||
BLIS_INC_PATH := $(INSTALL_PREFIX)/include
|
||||
BLIS_INC_PATH := $(INSTALL_PREFIX)/include/blis
|
||||
|
||||
# BLIS library.
|
||||
BLIS_LIB := $(BLIS_LIB_PATH)/libblis.a
|
||||
@@ -174,10 +174,8 @@ TEST_SIZES_SRC := test_size.c
|
||||
CFLAGS += -I$(BLIS_INC_PATH) -I$(TEST_SRC_PATH)
|
||||
|
||||
LINKER := $(CC)
|
||||
LDFLAGS := -L/usr/lib/gcc/x86_64-redhat-linux/4.1.2 -L/usr/lib/gcc/x86_64-redhat-linux/4.1.2/../../../../lib64 -L/lib/../lib64 -L/usr/lib/../lib64 -lgfortranbegin -lgfortran -lm
|
||||
LDFLAGS += -lpthread
|
||||
#LDFLAGS := -L/usr/lib/gcc/i486-linux-gnu/4.4.3 -L/usr/lib/gcc/i486-linux-gnu/4.4.3/../../../../lib -L/lib/../lib -L/usr/lib/../lib -L/usr/lib/gcc/i486-linux-gnu/4.4.3/../../.. -L/usr/lib/i486-linux-gnu -lgfortranbegin -lgfortran -lm
|
||||
|
||||
LDFLAGS := -L/home/00146/field/gnu/gcc-4.8.2/lib64
|
||||
LDFLAGS += -lgfortran -lm -lpthread
|
||||
|
||||
#
|
||||
# --- Targets/rules ------------------------------------------------------------
|
||||
|
||||
@@ -430,7 +430,7 @@ void PASTEMAC(ch,varname)( \
|
||||
b, \
|
||||
beta, \
|
||||
c, rs_c, cs_c, \
|
||||
a, b ); \
|
||||
NULL ); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( gemm_ukr, GEMM_UKERNEL )
|
||||
|
||||
@@ -485,15 +485,14 @@ void bli_gemmtrsm_ukr_make_subparts( dim_t k,
|
||||
#define FUNCPTR_T gemmtrsm_ukr_fp
|
||||
|
||||
typedef void (*FUNCPTR_T)(
|
||||
dim_t k,
|
||||
void* alpha,
|
||||
void* a1x,
|
||||
void* a11,
|
||||
void* bx1,
|
||||
void* b11,
|
||||
void* c11, inc_t rs_c, inc_t cs_c,
|
||||
void* a_next,
|
||||
void* b_next
|
||||
dim_t k,
|
||||
void* alpha,
|
||||
void* a1x,
|
||||
void* a11,
|
||||
void* bx1,
|
||||
void* b11,
|
||||
void* c11, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
);
|
||||
|
||||
static FUNCPTR_T GENARRAY(ftypes_l,gemmtrsm_l_ukr);
|
||||
@@ -515,7 +514,7 @@ void bli_gemmtrsm_ukr( obj_t* alpha,
|
||||
|
||||
void* buf_a11 = bli_obj_buffer_at_off( *a11 );
|
||||
|
||||
void* buf_bx1 = bli_obj_buffer_at_off( *bx1 );
|
||||
void* buf_bx1 = bli_obj_buffer_at_off( *bx1 );
|
||||
|
||||
void* buf_b11 = bli_obj_buffer_at_off( *b11 );
|
||||
|
||||
@@ -527,6 +526,17 @@ void bli_gemmtrsm_ukr( obj_t* alpha,
|
||||
|
||||
FUNCPTR_T f;
|
||||
|
||||
auxinfo_t data;
|
||||
|
||||
|
||||
// Fill the auxinfo_t struct in case the micro-kernel uses it.
|
||||
if ( bli_obj_is_lower( *a11 ) ) { bli_auxinfo_set_next_a( buf_a1x, data ); }
|
||||
else { bli_auxinfo_set_next_a( buf_a11, data ); }
|
||||
bli_auxinfo_set_next_b( buf_bx1, data );
|
||||
|
||||
// STILL NEED TO FILL IN PANEL STRIDE FIELDS!
|
||||
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
if ( bli_obj_is_lower( *a11 ) ) f = ftypes_l[dt];
|
||||
@@ -540,8 +550,7 @@ void bli_gemmtrsm_ukr( obj_t* alpha,
|
||||
buf_bx1,
|
||||
buf_b11,
|
||||
buf_c11, rs_c, cs_c,
|
||||
buf_a1x,
|
||||
buf_bx1 );
|
||||
&data );
|
||||
}
|
||||
|
||||
|
||||
@@ -549,15 +558,14 @@ void bli_gemmtrsm_ukr( obj_t* alpha,
|
||||
#define GENTFUNC( ctype, ch, varname, ukrname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname)( \
|
||||
dim_t k, \
|
||||
void* alpha, \
|
||||
void* a1x, \
|
||||
void* a11, \
|
||||
void* bx1, \
|
||||
void* b11, \
|
||||
void* c11, inc_t rs_c, inc_t cs_c, \
|
||||
void* a_next, \
|
||||
void* b_next \
|
||||
dim_t k, \
|
||||
void* alpha, \
|
||||
void* a1x, \
|
||||
void* a11, \
|
||||
void* bx1, \
|
||||
void* b11, \
|
||||
void* c11, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
) \
|
||||
{ \
|
||||
PASTEMAC(ch,ukrname)( k, \
|
||||
@@ -567,8 +575,7 @@ void PASTEMAC(ch,varname)( \
|
||||
bx1, \
|
||||
b11, \
|
||||
c11, rs_c, cs_c, \
|
||||
a_next, \
|
||||
b_next ); \
|
||||
data ); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( gemmtrsm_l_ukr, GEMMTRSM_L_UKERNEL )
|
||||
|
||||
@@ -48,15 +48,14 @@ void bli_gemmtrsm_ukr( obj_t* alpha,
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname)( \
|
||||
dim_t k, \
|
||||
void* alpha, \
|
||||
void* a1x, \
|
||||
void* a11, \
|
||||
void* bx1, \
|
||||
void* b11, \
|
||||
void* c11, inc_t rs_c, inc_t cs_c, \
|
||||
void* a_next, \
|
||||
void* b_next \
|
||||
dim_t k, \
|
||||
void* alpha, \
|
||||
void* a1x, \
|
||||
void* a11, \
|
||||
void* bx1, \
|
||||
void* b11, \
|
||||
void* c11, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( gemmtrsm_l_ukr )
|
||||
|
||||
@@ -377,9 +377,10 @@ void libblis_test_trsm_ukr_check( side_t side,
|
||||
#define FUNCPTR_T trsm_ukr_fp
|
||||
|
||||
typedef void (*FUNCPTR_T)(
|
||||
void* a,
|
||||
void* b,
|
||||
void* c, inc_t rs_c, inc_t cs_c
|
||||
void* a,
|
||||
void* b,
|
||||
void* c, inc_t rs_c, inc_t cs_c,
|
||||
auxinfo_t* data
|
||||
);
|
||||
|
||||
static FUNCPTR_T GENARRAY(ftypes_l,trsm_l_ukr);
|
||||
@@ -402,6 +403,16 @@ void bli_trsm_ukr( obj_t* a,
|
||||
|
||||
FUNCPTR_T f;
|
||||
|
||||
auxinfo_t data;
|
||||
|
||||
|
||||
// Fill the auxinfo_t struct in case the micro-kernel uses it.
|
||||
bli_auxinfo_set_next_a( buf_a, data );
|
||||
bli_auxinfo_set_next_b( buf_b, data );
|
||||
|
||||
// STILL NEED TO FILL IN PANEL STRIDE FIELDS!
|
||||
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
if ( bli_obj_is_lower( *a ) ) f = ftypes_l[dt];
|
||||
@@ -410,7 +421,8 @@ void bli_trsm_ukr( obj_t* a,
|
||||
// Invoke the function.
|
||||
f( buf_a,
|
||||
buf_b,
|
||||
buf_c, rs_c, cs_c );
|
||||
buf_c, rs_c, cs_c,
|
||||
&data );
|
||||
}
|
||||
|
||||
|
||||
@@ -418,14 +430,16 @@ void bli_trsm_ukr( obj_t* a,
|
||||
#define GENTFUNC( ctype, ch, varname, ukrname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname)( \
|
||||
void* a, \
|
||||
void* b, \
|
||||
void* c, inc_t rs_c, inc_t cs_c \
|
||||
void* a, \
|
||||
void* b, \
|
||||
void* c, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
) \
|
||||
{ \
|
||||
PASTEMAC(ch,ukrname)( a, \
|
||||
b, \
|
||||
c, rs_c, cs_c ); \
|
||||
c, rs_c, cs_c, \
|
||||
data ); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( trsm_l_ukr, TRSM_L_UKERNEL )
|
||||
|
||||
@@ -45,9 +45,10 @@ void bli_trsm_ukr( obj_t* a,
|
||||
#define GENTPROT( ctype, ch, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname)( \
|
||||
void* a, \
|
||||
void* b, \
|
||||
void* c, inc_t rs_c, inc_t cs_c \
|
||||
void* a, \
|
||||
void* b, \
|
||||
void* c, inc_t rs_c, inc_t cs_c, \
|
||||
auxinfo_t* data \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( trsm_l_ukr )
|
||||
|
||||
Reference in New Issue
Block a user