Added Apple Firestorm (A14/M1) Subconfig

- Use the same bulk kernel as Cortex-A53 / ThunderX2;
- Larger block size;
- Use gemmsup kernels for double precision.
This commit is contained in:
RuQing Xu
2021-08-13 03:21:19 +09:00
parent 3df0e9b653
commit e38ca28689
8 changed files with 319 additions and 0 deletions

View File

@@ -0,0 +1,145 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
void bli_cntx_init_firestorm( cntx_t* cntx )
{
blksz_t blkszs[ BLIS_NUM_BLKSZS ];
blksz_t thresh[ BLIS_NUM_THRESH ];
// Set default kernel blocksizes and functions.
bli_cntx_init_firestorm_ref( cntx );
// -------------------------------------------------------------------------
// Update the context with optimized native gemm micro-kernels and
// their storage preferences.
bli_cntx_set_l3_nat_ukrs
(
2,
BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_armv8a_asm_8x12, FALSE,
BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_armv8a_asm_6x8, FALSE,
cntx
);
// Update the context with optimized packm kernels.
bli_cntx_set_packm_kers
(
4,
BLIS_PACKM_8XK_KER, BLIS_FLOAT, bli_spackm_armv8a_int_8xk,
BLIS_PACKM_12XK_KER, BLIS_FLOAT, bli_spackm_armv8a_int_12xk,
BLIS_PACKM_6XK_KER, BLIS_DOUBLE, bli_dpackm_armv8a_int_6xk,
BLIS_PACKM_8XK_KER, BLIS_DOUBLE, bli_dpackm_armv8a_int_8xk,
cntx
);
// Initialize level-3 blocksize objects with architecture-specific values.
// s d c z
bli_blksz_init_easy( &blkszs[ BLIS_MR ], 8, 6, -1, -1 );
bli_blksz_init_easy( &blkszs[ BLIS_NR ], 12, 8, -1, -1 );
bli_blksz_init_easy( &blkszs[ BLIS_MC ], 120, 240, -1, -1 );
bli_blksz_init_easy( &blkszs[ BLIS_KC ], 640, 2048, -1, -1 );
bli_blksz_init_easy( &blkszs[ BLIS_NC ], 3072, 8192, -1, -1 );
// Update the context with the current architecture's register and cache
// blocksizes (and multiples) for native execution.
bli_cntx_set_blkszs
(
BLIS_NAT, 5,
BLIS_NC, &blkszs[ BLIS_NC ], BLIS_NR,
BLIS_KC, &blkszs[ BLIS_KC ], BLIS_KR,
BLIS_MC, &blkszs[ BLIS_MC ], BLIS_MR,
BLIS_NR, &blkszs[ BLIS_NR ], BLIS_NR,
BLIS_MR, &blkszs[ BLIS_MR ], BLIS_MR,
cntx
);
// -------------------------------------------------------------------------
// Initialize sup thresholds with architecture-appropriate values.
// s d c z
bli_blksz_init_easy( &thresh[ BLIS_MT ], -1, 99, -1, -1 );
bli_blksz_init_easy( &thresh[ BLIS_NT ], -1, 99, -1, -1 );
bli_blksz_init_easy( &thresh[ BLIS_KT ], -1, 99, -1, -1 );
// Initialize the context with the sup thresholds.
bli_cntx_set_l3_sup_thresh
(
3,
BLIS_MT, &thresh[ BLIS_MT ],
BLIS_NT, &thresh[ BLIS_NT ],
BLIS_KT, &thresh[ BLIS_KT ],
cntx
);
// Update the context with optimized small/unpacked gemm kernels.
bli_cntx_set_l3_sup_kers
(
8,
BLIS_RRR, BLIS_DOUBLE, bli_dgemmsup_rv_armv8a_asm_6x8m, TRUE,
BLIS_RRC, BLIS_DOUBLE, bli_dgemmsup_rd_armv8a_asm_6x8m, TRUE,
BLIS_RCR, BLIS_DOUBLE, bli_dgemmsup_rv_armv8a_asm_6x8m, TRUE,
BLIS_RCC, BLIS_DOUBLE, bli_dgemmsup_rv_armv8a_asm_6x8n, TRUE,
BLIS_CRR, BLIS_DOUBLE, bli_dgemmsup_rv_armv8a_asm_6x8m, TRUE,
BLIS_CRC, BLIS_DOUBLE, bli_dgemmsup_rd_armv8a_asm_6x8n, TRUE,
BLIS_CCR, BLIS_DOUBLE, bli_dgemmsup_rv_armv8a_asm_6x8n, TRUE,
BLIS_CCC, BLIS_DOUBLE, bli_dgemmsup_rv_armv8a_asm_6x8n, TRUE,
cntx
);
// Initialize level-3 sup blocksize objects with architecture-specific
// values.
// s d c z
bli_blksz_init ( &blkszs[ BLIS_MR ], -1, 6, -1, -1,
-1, 8, -1, -1 );
bli_blksz_init_easy( &blkszs[ BLIS_NR ], -1, 8, -1, -1 );
bli_blksz_init_easy( &blkszs[ BLIS_MC ], -1, 240, -1, -1 );
bli_blksz_init_easy( &blkszs[ BLIS_KC ], -1, 1024, -1, -1 );
bli_blksz_init_easy( &blkszs[ BLIS_NC ], -1, 3072, -1, -1 );
// Update the context with the current architecture's register and cache
// blocksizes for small/unpacked level-3 problems.
bli_cntx_set_l3_sup_blkszs
(
5,
BLIS_NC, &blkszs[ BLIS_NC ],
BLIS_KC, &blkszs[ BLIS_KC ],
BLIS_MC, &blkszs[ BLIS_MC ],
BLIS_NR, &blkszs[ BLIS_NR ],
BLIS_MR, &blkszs[ BLIS_MR ],
cntx
);
}

View File

@@ -0,0 +1,76 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
//#ifndef BLIS_FAMILY_H
//#define BLIS_FAMILY_H
// -- MEMORY ALLOCATION --------------------------------------------------------
#define BLIS_SIMD_ALIGN_SIZE 16
#if 0
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
#define BLIS_SGEMM_UKERNEL bli_sgemm_opt_8x12
#define BLIS_DEFAULT_MR_S 8
#define BLIS_DEFAULT_NR_S 12
#define BLIS_DEFAULT_MC_S 120 //1536 //336 //416 // 1280 //160 // 160 // 160 //2048 //336
#define BLIS_DEFAULT_KC_S 640 //1536 //336 //704 //1280 //672 //528 // 856 //2048 //528
#define BLIS_DEFAULT_NC_S 3072
#define BLIS_DGEMM_UKERNEL bli_dgemm_opt_6x8
#define BLIS_DEFAULT_MR_D 6
#define BLIS_DEFAULT_NR_D 8
#define BLIS_DEFAULT_MC_D 120 //1536 //160 //80 //176
#define BLIS_DEFAULT_KC_D 240 //1536 //304 //336 //368
#define BLIS_DEFAULT_NC_D 3072
#define BLIS_DEFAULT_MR_C 8
#define BLIS_DEFAULT_NR_C 4
#define BLIS_DEFAULT_MC_C 64
#define BLIS_DEFAULT_KC_C 128
#define BLIS_DEFAULT_NC_C 4096
#define BLIS_DEFAULT_MR_Z 8
#define BLIS_DEFAULT_NR_Z 4
#define BLIS_DEFAULT_MC_Z 64
#define BLIS_DEFAULT_KC_Z 128
#define BLIS_DEFAULT_NC_Z 4096
#endif
//#endif

View File

@@ -0,0 +1,82 @@
#
#
# BLIS
# An object-based framework for developing high-performance BLAS-like
# libraries.
#
# Copyright (C) 2014, The University of Texas at Austin
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# - Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# - Neither the name(s) of the copyright holder(s) nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
# Declare the name of the current configuration and add it to the
# running list of configurations included by common.mk.
THIS_CONFIG := firestorm
#CONFIGS_INCL += $(THIS_CONFIG)
#
# --- Determine the C compiler and related flags ---
#
# NOTE: The build system will append these variables with various
# general-purpose/configuration-agnostic flags in common.mk. You
# may specify additional flags here as needed.
CPPROCFLAGS := -D_GNU_SOURCE
CMISCFLAGS :=
CPICFLAGS :=
CWARNFLAGS :=
ifneq ($(DEBUG_TYPE),off)
CDBGFLAGS := -g
endif
ifeq ($(DEBUG_TYPE),noopt)
COPTFLAGS := -O0
else
COPTFLAGS := -O2 -march=armv8-a
endif
# Flags specific to optimized kernels.
CKOPTFLAGS := $(COPTFLAGS) -O3 -ftree-vectorize
CKVECFLAGS := -march=armv8-a
# Flags specific to reference kernels.
CROPTFLAGS := $(CKOPTFLAGS)
ifeq ($(CC_VENDOR),gcc)
CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast
else
ifeq ($(CC_VENDOR),clang)
CRVECFLAGS := $(CKVECFLAGS) -funsafe-math-optimizations -ffp-contract=fast
else
CRVECFLAGS := $(CKVECFLAGS)
endif
endif
# Store all of the variables here to new variables containing the
# configuration name.
$(eval $(call store-make-defs,$(THIS_CONFIG)))

View File

@@ -34,6 +34,7 @@ bulldozer: bulldozer
# ARM architectures.
armsve: armsve/armsve
a64fx: a64fx/armsve
firestorm: firestorm/armv8a
thunderx2: thunderx2/armv8a
cortexa57: cortexa57/armv8a
cortexa53: cortexa53/armv8a

View File

@@ -179,6 +179,9 @@ void bli_arch_set_id( void )
#ifdef BLIS_FAMILY_A64FX
id = BLIS_ARCH_A64FX;
#endif
#ifdef BLIS_FAMILY_FIRESTORM
id = BLIS_ARCH_FIRESTORM;
#endif
#ifdef BLIS_FAMILY_THUNDERX2
id = BLIS_ARCH_THUNDERX2;
#endif

View File

@@ -154,6 +154,11 @@ void bli_gks_init( void )
bli_cntx_init_a64fx_ref,
bli_cntx_init_a64fx_ind );
#endif
#ifdef BLIS_CONFIG_FIRESTORM
bli_gks_register_cntx( BLIS_ARCH_FIRESTORM, bli_cntx_init_firestorm,
bli_cntx_init_firestorm_ref,
bli_cntx_init_firestorm_ind );
#endif
#ifdef BLIS_CONFIG_CORTEXA15
bli_gks_register_cntx( BLIS_ARCH_CORTEXA15, bli_cntx_init_cortexa15,
bli_cntx_init_cortexa15_ref,

View File

@@ -89,6 +89,9 @@ CNTX_INIT_PROTS( armsve )
#ifdef BLIS_CONFIG_A64FX
CNTX_INIT_PROTS( a64fx )
#endif
#ifdef BLIS_CONFIG_FIRESTORM
CNTX_INIT_PROTS( firestorm )
#endif
#ifdef BLIS_CONFIG_THUNDERX2
CNTX_INIT_PROTS( thunderx2 )
#endif
@@ -195,6 +198,9 @@ CNTX_INIT_PROTS( generic )
#ifdef BLIS_FAMILY_A64FX
#include "bli_family_a64fx.h"
#endif
#ifdef BLIS_FAMILY_FIRESTORM
#include "bli_family_firestorm.h"
#endif
#ifdef BLIS_FAMILY_THUNDERX2
#include "bli_family_thunderx2.h"
#endif

View File

@@ -1010,6 +1010,7 @@ typedef enum
// ARM
BLIS_ARCH_ARMSVE,
BLIS_ARCH_A64FX,
BLIS_ARCH_FIRESTORM,
BLIS_ARCH_THUNDERX2,
BLIS_ARCH_CORTEXA57,
BLIS_ARCH_CORTEXA53,