Fixed outdated fusing factor macros in 1f kernels.

Details:
- Updated level-1f kernels for x86_64 and bgq to use renamed fusing factor
  macros. Meant to include this in 5e54f46c. Thanks to Fran for pointing
  this out.
This commit is contained in:
Field G. Van Zee
2013-10-10 11:27:27 -05:00
parent 73aa1e9f31
commit 661d5120cd
11 changed files with 86 additions and 106 deletions

View File

@@ -174,15 +174,15 @@ void bli_dddaxpyf_opt_var1(
if ( bli_zero_dim2( m, b_n ) ) return;
bool_t use_ref = FALSE;
// printf("%d\t%d\t%d\t%d\t%d\t%d\t%d\n", b_n, PASTEMAC(d, axpyf_fuse_fac), inca, incx, incy, bli_is_unaligned_to(a, 32), bli_is_unaligned_to( y, 32));
// printf("%d\t%d\t%d\t%d\t%d\t%d\t%d\n", b_n, PASTEMAC(d, axpyf_fusefac), inca, incx, incy, bli_is_unaligned_to(a, 32), bli_is_unaligned_to( y, 32));
// If there is anything that would interfere with our use of aligned
// vector loads/stores, call the reference implementation.
if ( b_n < PASTEMAC(d,axpyf_fuse_fac) || inca != 1 || incx != 1 || incy != 1 || bli_is_unaligned_to( a, 32 ) || bli_is_unaligned_to( y, 32 ) )
if ( b_n < PASTEMAC(d,axpyf_fusefac) || inca != 1 || incx != 1 || incy != 1 || bli_is_unaligned_to( a, 32 ) || bli_is_unaligned_to( y, 32 ) )
use_ref = TRUE;
// Call the reference implementation if needed.
if ( use_ref == TRUE )
{
// printf("%d\t%d\t%d\t%d\t%d\t%d\n", PASTEMAC(d, axpyf_fuse_fac), inca, incx, incy, bli_is_unaligned_to(a, 32), bli_is_unaligned_to( y, 32));
// printf("%d\t%d\t%d\t%d\t%d\t%d\n", PASTEMAC(d, axpyf_fusefac), inca, incx, incy, bli_is_unaligned_to(a, 32), bli_is_unaligned_to( y, 32));
// printf("DEFAULTING TO REFERENCE IMPLEMENTATION\n");
PASTEMAC3(d,d,d,axpyf_unb_var1)( conja, conjx, m, b_n, alpha_cast, a_cast, inca, lda, x_cast, incx, y_cast, incy );
return;

View File

@@ -26,39 +26,28 @@
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
void bli_axpyf_opt_var1( obj_t* alpha,
obj_t* x,
obj_t* y );
*/
//
// Define fusing factors.
// Prototype axpyf kernel interfaces.
//
#define bli_saxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_S
#define bli_daxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_D
#define bli_caxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_C
#define bli_zaxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_Z
#undef GENTPROT3U12
#define GENTPROT3U12( ctype_a, ctype_x, ctype_y, ctype_ax, cha, chx, chy, chax, varname ) \
\
void PASTEMAC3(cha,chx,chy,varname)( \
conj_t conja, \
conj_t conjx, \
dim_t m, \
dim_t n, \
void* alpha, \
void* a, inc_t inca, inc_t lda, \
void* x, inc_t incx, \
void* y, inc_t incy \
conj_t conja, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype_ax* restrict alpha, \
ctype_a* restrict a, inc_t inca, inc_t lda, \
ctype_x* restrict x, inc_t incx, \
ctype_y* restrict y, inc_t incy \
);
INSERT_GENTPROT3U12_BASIC( axpyf_opt_var1 )

View File

@@ -208,7 +208,7 @@ void bli_dddaxpyf_opt_var1(
// If there is anything that would interfere with our use of aligned
// vector loads/stores, call the reference implementation.
if ( b_n < PASTEMAC(d,axpyf_fuse_fac) )
if ( b_n < PASTEMAC(d,axpyf_fusefac) )
{
use_ref = TRUE;
}

View File

@@ -201,7 +201,7 @@ void bli_dddaxpyf_opt_var1(
if ( bli_zero_dim2( m, b_n ) ) return;
if ( b_n < PASTEMAC(d,axpyf_fuse_fac) )
if ( b_n < PASTEMAC(d,axpyf_fusefac) )
{
PASTEMAC3(d,d,d,axpyf_unb_var1)( conja,
conjx,

View File

@@ -26,39 +26,28 @@
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
void bli_axpyf_opt_var1( obj_t* alpha,
obj_t* x,
obj_t* y );
*/
//
// Define fusing factors.
// Prototype axpyf kernel interfaces.
//
#define bli_saxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_S
#define bli_daxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_D
#define bli_caxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_C
#define bli_zaxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_Z
#undef GENTPROT3U12
#define GENTPROT3U12( ctype_a, ctype_x, ctype_y, ctype_ax, cha, chx, chy, chax, varname ) \
\
void PASTEMAC3(cha,chx,chy,varname)( \
conj_t conja, \
conj_t conjx, \
dim_t m, \
dim_t n, \
void* alpha, \
void* a, inc_t inca, inc_t lda, \
void* x, inc_t incx, \
void* y, inc_t incy \
conj_t conja, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype_ax* restrict alpha, \
ctype_a* restrict a, inc_t inca, inc_t lda, \
ctype_x* restrict x, inc_t incx, \
ctype_y* restrict y, inc_t incy \
);
INSERT_GENTPROT3U12_BASIC( axpyf_opt_var1 )

View File

@@ -183,7 +183,7 @@ void bli_ddddotxaxpyf_opt_var1(
// If there is anything that would interfere with our use of aligned
// vector loads/stores, call the reference implementation.
if ( b_n < PASTEMAC(d,dotxaxpyf_fuse_fac) )
if ( b_n < PASTEMAC(d,dotxaxpyf_fusefac) )
{
use_ref = TRUE;
}

View File

@@ -26,37 +26,30 @@
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// Define fusing factors.
#define bli_sdotxaxpyf_fuse_fac ( BLIS_DEFAULT_FUSING_FACTOR_S )
#define bli_ddotxaxpyf_fuse_fac ( BLIS_DEFAULT_FUSING_FACTOR_D )
#define bli_cdotxaxpyf_fuse_fac ( BLIS_DEFAULT_FUSING_FACTOR_C / 2 )
#define bli_zdotxaxpyf_fuse_fac ( BLIS_DEFAULT_FUSING_FACTOR_Z / 2 )
#undef GENTPROT3U12
#define GENTPROT3U12( ctype_a, ctype_b, ctype_c, ctype_ab, cha, chb, chc, chab, varname ) \
\
void PASTEMAC3(cha,chb,chc,varname)( \
conj_t conjat, \
conj_t conja, \
conj_t conjw, \
conj_t conjx, \
dim_t m, \
dim_t n, \
void* alpha, \
void* a, inc_t inca, inc_t lda, \
void* w, inc_t incw, \
void* x, inc_t incx, \
void* beta, \
void* y, inc_t incy, \
void* z, inc_t incz \
conj_t conjat, \
conj_t conja, \
conj_t conjw, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype_ab* restrict alpha, \
ctype_a* restrict a, inc_t inca, inc_t lda, \
ctype_b* restrict w, inc_t incw, \
ctype_b* restrict x, inc_t incx, \
ctype_c* restrict beta, \
ctype_c* restrict y, inc_t incy, \
ctype_c* restrict z, inc_t incz \
);
INSERT_GENTPROT3U12_BASIC( dotxaxpyf_opt_var1 )

View File

@@ -158,7 +158,7 @@ void bli_ddddotxf_opt_var1(
// If there is anything that would interfere with our use of aligned
// vector loads/stores, call the reference implementation.
if ( b_n < PASTEMAC(d,dotxf_fuse_fac) )
if ( b_n < PASTEMAC(d,dotxf_fusefac) )
{
use_ref = TRUE;
}

View File

@@ -231,7 +231,7 @@ void bli_ddddotxf_opt_var1(
return;
}
if ( b_m < PASTEMAC(d,dotxf_fuse_fac) )
if ( b_m < PASTEMAC(d,dotxf_fusefac) )
{
PASTEMAC3(d,d,d,dotxf_unb_var1)( conjx,
conjy,

View File

@@ -26,7 +26,7 @@
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -34,27 +34,21 @@
//
// Define fusing factors for dotxf operation.
// Prototype dotxf kernel interfaces.
//
#define bli_sdotxf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_S
#define bli_ddotxf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_D
#define bli_cdotxf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_C
#define bli_zdotxf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_Z
#undef GENTPROT3U12
#define GENTPROT3U12( ctype_a, ctype_x, ctype_y, ctype_ax, cha, chx, chy, chax, varname ) \
\
void PASTEMAC3(chx,chy,chr,varname)( \
conj_t conjat, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
void* alpha, \
void* a, inc_t inca, inc_t lda, \
void* x, inc_t incx, \
void* beta, \
void* y, inc_t incy \
void PASTEMAC3(cha,chx,chy,varname)( \
conj_t conjat, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
ctype_ax* restrict alpha, \
ctype_a* restrict a, inc_t inca, inc_t lda, \
ctype_x* restrict x, inc_t incx, \
ctype_y* restrict beta, \
ctype_y* restrict y, inc_t incy \
);
INSERT_GENTPROT3U12_BASIC( dotxf_opt_var1 )

View File

@@ -54,21 +54,21 @@
// (b) NR (for triangular operations such as trmm and trsm).
//
#define BLIS_DEFAULT_MC_S 256
#define BLIS_DEFAULT_KC_S 256
#define BLIS_DEFAULT_NC_S 8192
#define BLIS_DEFAULT_MC_S 64
#define BLIS_DEFAULT_KC_S 128
#define BLIS_DEFAULT_NC_S 4096
#define BLIS_DEFAULT_MC_D 128
#define BLIS_DEFAULT_KC_D 256
#define BLIS_DEFAULT_MC_D 64
#define BLIS_DEFAULT_KC_D 128
#define BLIS_DEFAULT_NC_D 4096
#define BLIS_DEFAULT_MC_C 128
#define BLIS_DEFAULT_KC_C 256
#define BLIS_DEFAULT_MC_C 64
#define BLIS_DEFAULT_KC_C 128
#define BLIS_DEFAULT_NC_C 4096
#define BLIS_DEFAULT_MC_Z 64
#define BLIS_DEFAULT_KC_Z 256
#define BLIS_DEFAULT_NC_Z 2048
#define BLIS_DEFAULT_KC_Z 128
#define BLIS_DEFAULT_NC_Z 4096
// -- Cache blocksize extensions (for optimizing edge cases) --
@@ -100,16 +100,16 @@
// in the m and n dimensions should all be equal to the size expected by
// the reference micro-kernel(s).
#define BLIS_DEFAULT_MR_S 8
#define BLIS_DEFAULT_MR_S 4
#define BLIS_DEFAULT_NR_S 4
#define BLIS_DEFAULT_MR_D 8
#define BLIS_DEFAULT_MR_D 4
#define BLIS_DEFAULT_NR_D 4
#define BLIS_DEFAULT_MR_C 8
#define BLIS_DEFAULT_MR_C 4
#define BLIS_DEFAULT_NR_C 4
#define BLIS_DEFAULT_MR_Z 8
#define BLIS_DEFAULT_MR_Z 4
#define BLIS_DEFAULT_NR_Z 4
// NOTE: If the micro-kernel, which is typically unrolled to a factor
@@ -220,10 +220,25 @@
// of level-1f operations. They are here only for use when these operations
// are optimized.
#define BLIS_DEFAULT_FUSING_FACTOR_S 8
#define BLIS_DEFAULT_FUSING_FACTOR_D 4
#define BLIS_DEFAULT_FUSING_FACTOR_C 4
#define BLIS_DEFAULT_FUSING_FACTOR_Z 2
#define BLIS_DEFAULT_FUSE_FAC_S 8
#define BLIS_DEFAULT_FUSE_FAC_D 4
#define BLIS_DEFAULT_FUSE_FAC_C 4
#define BLIS_DEFAULT_FUSE_FAC_Z 2
#define BLIS_AXPYF_FUSE_FAC_S BLIS_DEFAULT_FUSE_FAC_S
#define BLIS_AXPYF_FUSE_FAC_D BLIS_DEFAULT_FUSE_FAC_D
#define BLIS_AXPYF_FUSE_FAC_C BLIS_DEFAULT_FUSE_FAC_C
#define BLIS_AXPYF_FUSE_FAC_Z BLIS_DEFAULT_FUSE_FAC_Z
#define BLIS_DOTXF_FUSE_FAC_S BLIS_DEFAULT_FUSE_FAC_S
#define BLIS_DOTXF_FUSE_FAC_D BLIS_DEFAULT_FUSE_FAC_D
#define BLIS_DOTXF_FUSE_FAC_C BLIS_DEFAULT_FUSE_FAC_C
#define BLIS_DOTXF_FUSE_FAC_Z BLIS_DEFAULT_FUSE_FAC_Z
#define BLIS_DOTXAXPYF_FUSE_FAC_S BLIS_DEFAULT_FUSE_FAC_S
#define BLIS_DOTXAXPYF_FUSE_FAC_D BLIS_DEFAULT_FUSE_FAC_D
#define BLIS_DOTXAXPYF_FUSE_FAC_C BLIS_DEFAULT_FUSE_FAC_C
#define BLIS_DOTXAXPYF_FUSE_FAC_Z BLIS_DEFAULT_FUSE_FAC_Z