Merge pull request #173 from devinamatthews/dev

Fix Cortex-A9 and Cortex-A15 configs.
This commit is contained in:
Devin Matthews
2018-03-14 13:27:44 -05:00
committed by GitHub
6 changed files with 14 additions and 12 deletions

View File

@@ -430,9 +430,9 @@ endef
# first argument: a kernel set (name) being targeted (e.g. haswell).
# second argument: the configuration whose CFLAGS we should use in compilation.
# third argument: the kernel file suffix being considered.
#$(BASE_OBJ_KERNELS_PATH)/$(1)/%.o: $(KERNELS_PATH)/$(1)/%.$(3) $(MK_HEADER_FILES) $(MAKE_DEFS_MK_PATHS)
#$(BASE_OBJ_KERNELS_PATH)/$(1)/%.o: $(KERNELS_PATH)/$(1)/%.c $(BLIS_H_FLAT) $(MAKE_DEFS_MK_PATHS)
define make-kernels-rule
$(BASE_OBJ_KERNELS_PATH)/$(1)/%.o: $(KERNELS_PATH)/$(1)/%.c $(BLIS_H_FLAT) $(MAKE_DEFS_MK_PATHS)
$(BASE_OBJ_KERNELS_PATH)/$(1)/%.o: $(KERNELS_PATH)/$(1)/%.$(3) $(BLIS_H_FLAT) $(MAKE_DEFS_MK_PATHS)
ifeq ($(BLIS_ENABLE_VERBOSE_MAKE_OUTPUT),yes)
$(CC) $(call get-kernel-cflags-for,$(2)) -c $$< -o $$@
else
@@ -468,7 +468,9 @@ $(foreach conf, $(CONFIG_LIST), $(eval $(call make-refkern-rule,$(conf))))
# Instantiate the build rule for optimized kernels for each of the kernel
# sets in KERNEL_LIST with the CFLAGS designated for the sub-configuration
# specified by the KCONFIG_MAP.
$(foreach kset, $(KERNEL_LIST), $(eval $(call make-kernels-rule,$(kset),$(call get-config-for-kset,$(kset)))))
$(foreach kset, $(KERNEL_LIST), $(eval $(call make-kernels-rule,$(kset),$(call get-config-for-kset,$(kset)),c)))
$(foreach kset, $(KERNEL_LIST), $(eval $(call make-kernels-rule,$(kset),$(call get-config-for-kset,$(kset)),s)))
$(foreach kset, $(KERNEL_LIST), $(eval $(call make-kernels-rule,$(kset),$(call get-config-for-kset,$(kset)),S)))
# FGVZ: for later, to compile multiple kernel source suffixes.
#$(foreach suf, $(KERNEL_SUFS), \

View File

@@ -48,8 +48,8 @@ void bli_cntx_init_cortexa15( cntx_t* cntx )
bli_cntx_set_l3_nat_ukrs
(
2,
BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_arm_int_4x4, FALSE,
BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_arm_int_4x4, FALSE,
BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_armv7a_int_4x4, FALSE,
BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_armv7a_int_4x4, FALSE,
cntx
);

View File

@@ -44,14 +44,14 @@
#if 0
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
#define BLIS_SGEMM_UKERNEL bli_sgemm_opt_4x4
#define BLIS_SGEMM_UKERNEL bli_sgemm_armv7a_int_4x4
#define BLIS_DEFAULT_MR_S 4
#define BLIS_DEFAULT_NR_S 4
#define BLIS_DEFAULT_MC_S 336
#define BLIS_DEFAULT_KC_S 528
#define BLIS_DEFAULT_NC_S 4096
#define BLIS_DGEMM_UKERNEL bli_dgemm_opt_4x4
#define BLIS_DGEMM_UKERNEL bli_dgemm_armv7a_int_4x4
#define BLIS_DEFAULT_MR_D 4
#define BLIS_DEFAULT_NR_D 4
#define BLIS_DEFAULT_MC_D 176

View File

@@ -48,8 +48,8 @@ void bli_cntx_init_cortexa9( cntx_t* cntx )
bli_cntx_set_l3_nat_ukrs
(
2,
BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_arm_int_4x4, FALSE,
BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_arm_int_4x4, FALSE,
BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_armv7a_int_4x4, FALSE,
BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_armv7a_int_4x4, FALSE,
cntx
);

View File

@@ -44,14 +44,14 @@
#if 0
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
#define BLIS_SGEMM_UKERNEL bli_sgemm_opt_4x4
#define BLIS_SGEMM_UKERNEL bli_sgemm_armv7a_int_4x4
#define BLIS_DEFAULT_MR_S 4
#define BLIS_DEFAULT_NR_S 4
#define BLIS_DEFAULT_MC_S 432
#define BLIS_DEFAULT_KC_S 352
#define BLIS_DEFAULT_NC_S 4096
#define BLIS_DGEMM_UKERNEL bli_dgemm_opt_4x4
#define BLIS_DGEMM_UKERNEL bli_dgemm_armv7a_int_4x4
#define BLIS_DEFAULT_MR_D 4
#define BLIS_DEFAULT_NR_D 4
#define BLIS_DEFAULT_MC_D 176

View File

@@ -67,7 +67,7 @@ endif
CKOPTFLAGS := $(COPTFLAGS)
ifneq ($(CC_VENDOR),gcc)
ifeq ($(CC_VENDOR),gcc)
CVECFLAGS := -march=armv7-a
else
$(error gcc is required for this configuration.)