mirror of
https://github.com/amd/blis.git
synced 2026-04-20 07:38:53 +00:00
Added build system and continued reorganization.
Details: - Added/renamed packm, unpackm kernels. - Added machine value routines. - Added param_map facility. - Renamed AUTHORS to CREDITS. - Added Makefile; continued to expand upon existing configure script. - #define fuse_fac macros in operation headers if not defined already (by the user in bl2_kernels.h).
This commit is contained in:
28
.gitignore
vendored
28
.gitignore
vendored
@@ -1,30 +1,34 @@
|
||||
# generic files to ignore
|
||||
# -- generic files to ignore --
|
||||
|
||||
*~ # emacs backup files
|
||||
*.swp # vim swap file
|
||||
*.out # anything
|
||||
# emacs backup files
|
||||
*~
|
||||
# vim backup files
|
||||
*.swp
|
||||
|
||||
# compiler-related
|
||||
# -- compiler-related --
|
||||
|
||||
*.o # object files
|
||||
*.a # static library archives
|
||||
*.x # test executables
|
||||
# object files
|
||||
*.o
|
||||
# static library archives
|
||||
*.a
|
||||
# test executables
|
||||
*.x
|
||||
|
||||
# build system files
|
||||
# -- build system files --
|
||||
|
||||
config.mk
|
||||
|
||||
# directories with build products
|
||||
# -- directories with build products --
|
||||
|
||||
lib
|
||||
obj
|
||||
|
||||
# makefile fragments
|
||||
# -- makefile fragments --
|
||||
|
||||
.fragment.mk
|
||||
fragment.mk
|
||||
|
||||
# misc.
|
||||
# -- misc. --
|
||||
|
||||
output*.m
|
||||
|
||||
|
||||
@@ -18,8 +18,8 @@ conceived, designed, and developed, including:
|
||||
Rhys Ulerich (The University of Texas at Austin)
|
||||
Robert van de Geijn (The University of Texas at Austin)
|
||||
|
||||
Thanks go the following individuals for helping port very early
|
||||
versions of BLIS to new architectures as proofs-of-concept:
|
||||
Thanks go the following individuals for porting very early versions
|
||||
of BLIS to new architectures as proofs-of-concept:
|
||||
|
||||
Francisco Igual (The University of Texas at Austin)
|
||||
Tyler Smith (The University of Texas at Austin)
|
||||
174
Makefile
174
Makefile
@@ -77,8 +77,12 @@ LIB_DIR := lib
|
||||
|
||||
NOOPT_DIR := noopt
|
||||
|
||||
# Construct a path to the framework source tree.
|
||||
NOOPT_TEXT := "(NOTE: optimizations disabled)"
|
||||
|
||||
# Construct some paths.
|
||||
FRAME_PATH := ./$(FRAME_DIR)
|
||||
OBJ_PATH := ./$(OBJ_DIR)
|
||||
LIB_PATH := ./$(LIB_DIR)
|
||||
|
||||
|
||||
|
||||
@@ -106,9 +110,9 @@ endif
|
||||
CONFIG_PATH := ./$(CONFIG_DIR)/$(CONFIG_NAME)
|
||||
|
||||
# Construct base paths for the object file tree.
|
||||
BASE_OBJ_DIR := ./$(OBJ_DIR)/$(CONFIG_NAME)
|
||||
BASE_OBJ_CONFIG_PATH := $(BASE_OBJ_DIR)/$(CONFIG_DIR)
|
||||
BASE_OBJ_FRAME_PATH := $(BASE_OBJ_DIR)/$(FRAME_DIR)
|
||||
BASE_OBJ_PATH := ./$(OBJ_DIR)/$(CONFIG_NAME)
|
||||
BASE_OBJ_CONFIG_PATH := $(BASE_OBJ_PATH)/$(CONFIG_DIR)
|
||||
BASE_OBJ_FRAME_PATH := $(BASE_OBJ_PATH)/$(FRAME_DIR)
|
||||
|
||||
# Construct base path for the library.
|
||||
BASE_LIB_PATH := ./$(LIB_DIR)/$(CONFIG_NAME)
|
||||
@@ -144,7 +148,7 @@ endif
|
||||
# Construct the architecture-version string, which will be used to name the
|
||||
# library upon installation.
|
||||
VERSION := $(shell cat version)
|
||||
CONF_VERS := $(CONFIG_NAME)-$(VERSION)
|
||||
VERS_CONF := $(VERSION)-$(CONFIG_NAME)
|
||||
|
||||
# --- Library names ---
|
||||
|
||||
@@ -172,33 +176,33 @@ MK_CONFIG_OBJS :=
|
||||
MK_CONFIG_NOOPT_OBJS :=
|
||||
|
||||
# Append the base library path to the library name.
|
||||
MK_ALL_BLIS_LIB := $(BASE_LIB_DIR)/$(BLIS_LIB_NAME)
|
||||
MK_ALL_BLIS_LIB := $(BASE_LIB_PATH)/$(BLIS_LIB_NAME)
|
||||
|
||||
# --- Define install target names for static libraries ---
|
||||
|
||||
MK_BLIS_LIB := $(MK_ALL_BLIS_LIB)
|
||||
MK_BLIS_LIB_INST := $(patsubst $(BASE_LIB_DIR)/%.a, \
|
||||
MK_BLIS_LIB_INST := $(patsubst $(BASE_LIB_PATH)/%.a, \
|
||||
$(INSTALL_PREFIX)/lib/%.a, \
|
||||
$(MK_BLIS_LIB))
|
||||
MK_BLIS_LIB_INST_W_CONF_VERS := $(patsubst $(BASE_LIB_DIR)/%.a, \
|
||||
$(INSTALL_PREFIX)/lib/%-$(CONF_VERS).a, \
|
||||
MK_BLIS_LIB_INST_W_VERS_CONF := $(patsubst $(BASE_LIB_PATH)/%.a, \
|
||||
$(INSTALL_PREFIX)/lib/%-$(VERS_CONF).a, \
|
||||
$(MK_BLIS_LIB))
|
||||
|
||||
# --- Determine which libraries to build ---
|
||||
|
||||
MK_LIBS :=
|
||||
MK_LIBS_INST :=
|
||||
MK_LIBS_INST_W_CONF_VERS :=
|
||||
MK_LIBS_INST_W_VERS_CONF :=
|
||||
|
||||
ifeq ($(BLIS_ENABLE_STATIC_BUILD),yes)
|
||||
MK_LIBS += $(MK_BLIS_LIB)
|
||||
MK_LIBS_INST += $(MK_BLIS_LIB_INST)
|
||||
MK_LIBS_INST_W_CONF_VERS += $(MK_BLIS_LIB_INST_W_CONF_VERS)
|
||||
MK_LIBS_INST_W_VERS_CONF += $(MK_BLIS_LIB_INST_W_VERS_CONF)
|
||||
endif
|
||||
|
||||
# Set the include directory names
|
||||
MK_INCL_DIR_INST := $(INSTALL_PREFIX)/include
|
||||
MK_INCL_DIR_INST_W_CONF_VERS := $(INSTALL_PREFIX)/include-$(CONF_VERS)
|
||||
MK_INCL_DIR_INST_W_VERS_CONF := $(INSTALL_PREFIX)/include-$(VERS_CONF)
|
||||
|
||||
|
||||
|
||||
@@ -212,16 +216,21 @@ MK_INCL_DIR_INST_W_CONF_VERS := $(INSTALL_PREFIX)/include-$(CONF_VERS)
|
||||
# makefile fragments reside.
|
||||
FRAGMENT_DIR_PATHS :=
|
||||
|
||||
# The only fragment sub-directories that we build from are the config and
|
||||
# and frame directories.
|
||||
FRAGMENT_SUB_DIRS := $(CONFIG_PATH) $(FRAME_PATH)
|
||||
|
||||
# This variable is used by the include statements as they recursively include
|
||||
# one another. We initialize it to the current directory.
|
||||
# one another. For the framework source tree, we initialize it to the current
|
||||
# directory since '.' is its parent.
|
||||
PARENT_PATH := .
|
||||
|
||||
# Recursively include all the makefile fragments.
|
||||
-include $(addsuffix /$(FRAGMENT_MK), $(FRAGMENT_SUB_DIRS))
|
||||
# Recursively include all the makefile fragments in the framework itself.
|
||||
-include $(addsuffix /$(FRAGMENT_MK), $(FRAME_PATH))
|
||||
|
||||
# Now set PARENT_PATH to ./config in preparation to include the fragments in
|
||||
# the configuration sub-directory.
|
||||
PARENT_PATH := ./$(CONFIG_DIR)
|
||||
|
||||
# Recursively include all the makefile fragments in the configuration
|
||||
# sub-directory.
|
||||
-include $(addsuffix /$(FRAGMENT_MK), $(CONFIG_PATH))
|
||||
|
||||
# Create a list of the makefile fragments.
|
||||
MAKEFILE_FRAGMENTS := $(addsuffix /$(FRAGMENT_MK), $(FRAGMENT_DIR_PATHS))
|
||||
@@ -317,17 +326,17 @@ check: check-make-defs check-fragments check-config
|
||||
|
||||
check-config:
|
||||
ifeq ($(CONFIG_MK_PRESENT),no)
|
||||
$(error Cannot proceed: config.mk not detected! Run configure first)
|
||||
$(error Cannot proceed: config.mk not detected! Run configure first)
|
||||
endif
|
||||
|
||||
check-fragments: check-config
|
||||
ifeq ($(MAKEFILE_FRAGMENTS_PRESENT),no)
|
||||
$(error Cannot proceed: makefile fragments not detected! Run configure first)
|
||||
$(error Cannot proceed: makefile fragments not detected! Run configure first)
|
||||
endif
|
||||
|
||||
check-make-defs: check-fragments
|
||||
ifeq ($(MAKE_DEFS_MK_PRESENT),no)
|
||||
$(error Cannot proceed: make_defs.mk not detected! Invalid configuration)
|
||||
$(error Cannot proceed: make_defs.mk not detected! Invalid configuration)
|
||||
endif
|
||||
|
||||
|
||||
@@ -337,27 +346,32 @@ endif
|
||||
|
||||
# --- General source code / object code rules ---
|
||||
|
||||
$(BASE_OBJ_FRAME_DIR)/%.o: $(FRAME_PATH)/%.c $(CONFIG_MK_PATH)
|
||||
#$(BASE_OBJ_FRAME_PATH)/%.o: $(FRAME_PATH)/%.c $(CONFIG_MK_PATH)
|
||||
#ifeq ($(BLIS_ENABLE_VERBOSE_MAKE_OUTPUT),yes)
|
||||
# $(CC) $(CFLAGS) -c $< -o $@
|
||||
#else
|
||||
# @echo "Compiling $<"
|
||||
# @$(CC) $(CFLAGS) -c $< -o $@
|
||||
#endif
|
||||
|
||||
$(BASE_OBJ_FRAME_PATH)/%.o: $(FRAME_PATH)/%.c $(CONFIG_MK_PATH)
|
||||
ifeq ($(BLIS_ENABLE_VERBOSE_MAKE_OUTPUT),yes)
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
$(CC) $(if $(findstring $(NOOPT_DIR),$@),$(CFLAGS_NOOPT),$(CFLAGS)) -c $< -o $@
|
||||
else
|
||||
ifeq ($(findstring $(NOOPT_DIR),$@),)
|
||||
@echo "Compiling $<"
|
||||
@$(CC) $(CFLAGS) -c $< -o $@
|
||||
else
|
||||
@echo "Compiling (no opt) $<"
|
||||
@$(CC) $(CFLAGS_NOOPT) -c $< -o $@
|
||||
endif
|
||||
@echo "Compiling $<" $(if $(findstring $(NOOPT_DIR),$@),$(NOOPT_TEXT),)
|
||||
@$(CC) $(if $(findstring $(NOOPT_DIR),$@),$(CFLAGS_NOOPT),$(CFLAGS)) -c $< -o $@
|
||||
endif
|
||||
|
||||
$(BASE_OBJ_CONFIG_DIR)/%.o: $(CONFIG_PATH)/%.c $(CONFIG_MK_PATH)
|
||||
$(BASE_OBJ_CONFIG_PATH)/%.o: $(CONFIG_PATH)/%.c $(CONFIG_MK_PATH)
|
||||
ifeq ($(BLIS_ENABLE_VERBOSE_MAKE_OUTPUT),yes)
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
$(CC) $(if $(findstring $(NOOPT_DIR),$@),$(CFLAGS_NOOPT),$(CFLAGS)) -c $< -o $@
|
||||
else
|
||||
@echo "Compiling $<"
|
||||
@$(CC) $(CFLAGS) -c $< -o $@
|
||||
@echo "Compiling $<" $(if $(findstring $(NOOPT_DIR),$@),$(NOOPT_TEXT),)
|
||||
@$(CC) $(if $(findstring $(NOOPT_DIR),$@),$(CFLAGS_NOOPT),$(CFLAGS)) -c $< -o $@
|
||||
endif
|
||||
|
||||
#ifeq ($(findstring $(NOOPT_DIR),$@),)
|
||||
|
||||
|
||||
# --- Static library archiver rules ---
|
||||
|
||||
@@ -374,28 +388,28 @@ endif
|
||||
|
||||
# --- Install rules ---
|
||||
|
||||
install-libs: check $(MK_LIBS_INST_W_CONF_VERS)
|
||||
install-libs: check $(MK_LIBS_INST_W_VERS_CONF)
|
||||
|
||||
install-headers: check $(MK_INCL_DIR_INST_W_CONF_VERS)
|
||||
install-headers: check $(MK_INCL_DIR_INST_W_VERS_CONF)
|
||||
|
||||
$(MK_INCL_DIR_INST_W_CONF_VERS): $(MK_HEADER_FILES)
|
||||
$(MK_INCL_DIR_INST_W_VERS_CONF): $(MK_HEADER_FILES)
|
||||
ifeq ($(BLIS_ENABLE_VERBOSE_MAKE_OUTPUT),yes)
|
||||
$(INSTALL) -m 0755 -d $(@)
|
||||
$(INSTALL) -m 0644 $(MK_HEADER_FILES) $(@)
|
||||
$(INSTALL) -m 0755 -d $(@)
|
||||
$(INSTALL) -m 0644 $(MK_HEADER_FILES) $(@)
|
||||
else
|
||||
@$(INSTALL) -m 0755 -d $(@)
|
||||
@echo "Installing C header files into $(@)"
|
||||
@$(INSTALL) -m 0644 $(MK_HEADER_FILES) $(@)
|
||||
@$(INSTALL) -m 0755 -d $(@)
|
||||
@echo "Installing C header files into $(@)"
|
||||
@$(INSTALL) -m 0644 $(MK_HEADER_FILES) $(@)
|
||||
endif
|
||||
|
||||
$(INSTALL_PREFIX)/lib/%-$(CONF_VERS).a: $(BASE_LIB_DIR)/%.a
|
||||
$(INSTALL_PREFIX)/lib/%-$(VERS_CONF).a: $(BASE_LIB_PATH)/%.a
|
||||
ifeq ($(BLIS_ENABLE_VERBOSE_MAKE_OUTPUT),yes)
|
||||
$(INSTALL) -m 0755 -d $(@D)
|
||||
$(INSTALL) -m 0644 $< $@
|
||||
$(INSTALL) -m 0755 -d $(@D)
|
||||
$(INSTALL) -m 0644 $< $@
|
||||
else
|
||||
@echo "Installing $(@F) into $(INSTALL_PREFIX)/lib/"
|
||||
@$(INSTALL) -m 0755 -d $(@D)
|
||||
@$(INSTALL) -m 0644 $< $@
|
||||
@echo "Installing $(@F) into $(INSTALL_PREFIX)/lib/"
|
||||
@$(INSTALL) -m 0755 -d $(@D)
|
||||
@$(INSTALL) -m 0644 $< $@
|
||||
endif
|
||||
|
||||
|
||||
@@ -405,40 +419,62 @@ install-lib-symlinks: check $(MK_LIBS_INST)
|
||||
|
||||
install-header-symlinks: check $(MK_INCL_DIR_INST)
|
||||
|
||||
$(MK_INCL_DIR_INST): $(MK_INCL_DIR_INST_W_CONF_VERS)
|
||||
$(MK_INCL_DIR_INST): $(MK_INCL_DIR_INST_W_VERS_CONF)
|
||||
ifeq ($(BLIS_ENABLE_VERBOSE_MAKE_OUTPUT),yes)
|
||||
$(SYMLINK) $(<F) $(@F)
|
||||
$(MV) $(@F) $(INSTALL_PREFIX)
|
||||
$(SYMLINK) $(<F) $(@F)
|
||||
$(MV) $(@F) $(INSTALL_PREFIX)
|
||||
else
|
||||
@echo "Installing symlink $(@F) into $(INSTALL_PREFIX)/"
|
||||
@$(SYMLINK) $(<F) $(@F)
|
||||
@$(MV) $(@F) $(INSTALL_PREFIX)
|
||||
@echo "Installing symlink $(@F) into $(INSTALL_PREFIX)/"
|
||||
@$(SYMLINK) $(<F) $(@F)
|
||||
@$(MV) $(@F) $(INSTALL_PREFIX)
|
||||
endif
|
||||
|
||||
$(INSTALL_PREFIX)/lib/%.a: $(INSTALL_PREFIX)/lib/%-$(CONF_VERS).a
|
||||
$(INSTALL_PREFIX)/lib/%.a: $(INSTALL_PREFIX)/lib/%-$(VERS_CONF).a
|
||||
ifeq ($(BLIS_ENABLE_VERBOSE_MAKE_OUTPUT),yes)
|
||||
$(SYMLINK) $(<F) $(@F)
|
||||
$(MV) $(@F) $(INSTALL_PREFIX)/lib/
|
||||
$(SYMLINK) $(<F) $(@F)
|
||||
$(MV) $(@F) $(INSTALL_PREFIX)/lib/
|
||||
else
|
||||
@echo "Installing symlink $(@F) into $(INSTALL_PREFIX)/lib/"
|
||||
@$(SYMLINK) $(<F) $(@F)
|
||||
@$(MV) $(@F) $(INSTALL_PREFIX)/lib/
|
||||
@echo "Installing symlink $(@F) into $(INSTALL_PREFIX)/lib/"
|
||||
@$(SYMLINK) $(<F) $(@F)
|
||||
@$(MV) $(@F) $(INSTALL_PREFIX)/lib/
|
||||
endif
|
||||
|
||||
|
||||
# --- Clean rules ---
|
||||
|
||||
cleanmost: check
|
||||
- $(FIND) $(BASE_OBJ_DIR) -name "*.o" | $(XARGS) $(RM_F)
|
||||
- $(FIND) $(BASE_LIB_DIR) -name "*.a" | $(XARGS) $(RM_F)
|
||||
ifeq ($(BLIS_ENABLE_VERBOSE_MAKE_OUTPUT),yes)
|
||||
- $(FIND) $(BASE_OBJ_PATH) -name "*.o" | $(XARGS) $(RM_F)
|
||||
- $(FIND) $(BASE_LIB_PATH) -name "*.a" | $(XARGS) $(RM_F)
|
||||
else
|
||||
@echo "Removing .o files from $(BASE_OBJ_PATH)."
|
||||
@- $(FIND) $(BASE_OBJ_PATH) -name "*.o" | $(XARGS) $(RM_F)
|
||||
@echo "Removing .a files from $(BASE_LIB_PATH)."
|
||||
@- $(FIND) $(BASE_LIB_PATH) -name "*.a" | $(XARGS) $(RM_F)
|
||||
endif
|
||||
|
||||
distclean: check cleanmost cleanmk
|
||||
distclean: check cleanmk cleanmost
|
||||
ifeq ($(BLIS_ENABLE_VERBOSE_MAKE_OUTPUT),yes)
|
||||
- $(RM_F) $(CONFIG_MK_PATH)
|
||||
- $(RM_RF) $(OBJ_DIR)
|
||||
- $(RM_RF) $(LIB_DIR)
|
||||
- $(RM_RF) $(OBJ_PATH)
|
||||
- $(RM_RF) $(LIB_PATH)
|
||||
else
|
||||
@echo "Removing $(CONFIG_MK_PATH)."
|
||||
@- $(RM_F) $(CONFIG_MK_PATH)
|
||||
@echo "Removing $(OBJ_PATH)."
|
||||
@- $(RM_RF) $(OBJ_PATH)
|
||||
@echo "Removing $(LIB_PATH)."
|
||||
@- $(RM_RF) $(LIB_PATH)
|
||||
endif
|
||||
|
||||
cleanmk: check
|
||||
- $(FIND) $(CONFIG_PATH) -name "$(FRAGMENT_MK)" | $(XARGS) $(RM_F)
|
||||
- $(FIND) $(FRAME_PATH) -name "$(FRAGMENT_MK)" | $(XARGS) $(RM_F)
|
||||
|
||||
ifeq ($(BLIS_ENABLE_VERBOSE_MAKE_OUTPUT),yes)
|
||||
- $(FIND) $(CONFIG_PATH) -name "$(FRAGMENT_MK)" | $(XARGS) $(RM_F)
|
||||
- $(FIND) $(FRAME_PATH) -name "$(FRAGMENT_MK)" | $(XARGS) $(RM_F)
|
||||
else
|
||||
@echo "Removing makefile fragments from $(CONFIG_PATH)."
|
||||
@- $(FIND) $(CONFIG_PATH) -name "$(FRAGMENT_MK)" | $(XARGS) $(RM_F)
|
||||
@echo "Removing makefile fragments from $(FRAME_PATH)."
|
||||
@- $(FIND) $(FRAME_PATH) -name "$(FRAGMENT_MK)" | $(XARGS) $(RM_F)
|
||||
endif
|
||||
|
||||
|
||||
@@ -53,6 +53,24 @@
|
||||
|
||||
|
||||
|
||||
// -- LEVEL-1M KERNEL DEFINITIONS ----------------------------------------------
|
||||
|
||||
// -- packm --
|
||||
|
||||
#define PACKM_2XK_KERNEL packm_ref_2xk
|
||||
#define PACKM_4XK_KERNEL packm_ref_4xk
|
||||
#define PACKM_6XK_KERNEL packm_ref_6xk
|
||||
#define PACKM_8XK_KERNEL packm_ref_8xk
|
||||
|
||||
// -- unpackm --
|
||||
|
||||
#define UNPACKM_2XK_KERNEL unpackm_ref_2xk
|
||||
#define UNPACKM_4XK_KERNEL unpackm_ref_4xk
|
||||
#define UNPACKM_6XK_KERNEL unpackm_ref_6xk
|
||||
#define UNPACKM_8XK_KERNEL unpackm_ref_8xk
|
||||
|
||||
|
||||
|
||||
// -- LEVEL-1F KERNEL DEFINITIONS ----------------------------------------------
|
||||
|
||||
// -- axpy2v --
|
||||
|
||||
@@ -72,10 +72,10 @@ INSTALL := install -c
|
||||
|
||||
# --- Determine the C compiler and related flags ---
|
||||
CC := gcc
|
||||
CPPROCFLAGS :=
|
||||
CPPROCFLAGS :=
|
||||
CMISCFLAGS := -std=c99 # -fopenmp -pg
|
||||
CDBGFLAGS := -g
|
||||
CWARNFLAGS := -Wall # -Wno-comment
|
||||
CWARNFLAGS := -Wall
|
||||
COPTFLAGS := -O2 -malign-double
|
||||
CVECFLAGS := -msse3 -march=native # -mfpmath=sse
|
||||
|
||||
|
||||
49
configure
vendored
49
configure
vendored
@@ -36,7 +36,9 @@ print_usage()
|
||||
{
|
||||
# Echo usage info.
|
||||
echo " "
|
||||
echo " ${script_name}"
|
||||
echo " ${script_name} (BLIS ${version})"
|
||||
#echo " "
|
||||
#echo " BLIS ${version}"
|
||||
echo " "
|
||||
echo " Field G. Van Zee"
|
||||
echo " "
|
||||
@@ -58,7 +60,9 @@ print_usage()
|
||||
echo " "
|
||||
echo " -p PREFIX install prefix"
|
||||
echo " The path to which make will install buid products."
|
||||
echo " If not given, PREFIX defaults to \$(HOME)/blis."
|
||||
echo " If not given, PREFIX defaults to \$(HOME)/blis. If"
|
||||
echo " the path refers to a directory that does not exist,"
|
||||
echo " it will be created."
|
||||
echo " "
|
||||
echo " -q quiet"
|
||||
echo " Suppress informational output. By default, configure"
|
||||
@@ -75,7 +79,13 @@ main()
|
||||
script_name=${0##*/}
|
||||
|
||||
# Option variables.
|
||||
quiet_flag=""
|
||||
quiet_flag=''
|
||||
|
||||
# The file in which the version string is kept.
|
||||
version_file='version'
|
||||
|
||||
# The contents of version_file.
|
||||
version=''
|
||||
|
||||
# The name of the build directory. This is where the template
|
||||
# config.mk.in file lives.
|
||||
@@ -104,12 +114,19 @@ main()
|
||||
# The name of the directory in which object files will be kept.
|
||||
obj_dir='obj'
|
||||
|
||||
# The name of the directory in which libraries will be kept.
|
||||
lib_dir='lib'
|
||||
|
||||
# The install prefix flag.
|
||||
install_prefix_def="${HOME}/blis"
|
||||
install_prefix=''
|
||||
prefix_flag=''
|
||||
|
||||
|
||||
# Query which version of BLIS this is.
|
||||
version=$(cat "${version_file}")
|
||||
|
||||
|
||||
# Process our command line options.
|
||||
while getopts ":hp:q" opt; do
|
||||
case $opt in
|
||||
@@ -123,6 +140,10 @@ main()
|
||||
shift $(($OPTIND - 1))
|
||||
|
||||
|
||||
# Initial message.
|
||||
echo "${script_name}: starting configuration of BLIS ${version}."
|
||||
|
||||
|
||||
# Set config_name based on the number of arguments leftover (after command
|
||||
# line option processing).
|
||||
if [ $# = "0" ]; then
|
||||
@@ -140,7 +161,7 @@ main()
|
||||
fi
|
||||
|
||||
# Build the config directory path
|
||||
config_path="${config_dir}/${config_name}"
|
||||
config_path="./${config_dir}/${config_name}"
|
||||
|
||||
|
||||
# Set the install prefix if it was not already set when parsing the install
|
||||
@@ -168,7 +189,7 @@ main()
|
||||
|
||||
|
||||
# Create obj sub-directories (if they do not already exist).
|
||||
base_obj_path="${obj_dir}/${config_name}"
|
||||
base_obj_path="./${obj_dir}/${config_name}"
|
||||
|
||||
echo "${script_name}: creating ${base_obj_path}"
|
||||
mkdir -p ${base_obj_path}
|
||||
@@ -184,19 +205,25 @@ main()
|
||||
mkdir -p ${obj_frame_path}
|
||||
|
||||
|
||||
# Mirror source trees to the object directory.
|
||||
echo "${script_name}: mirroring ${config_path} to ${obj_config_path}"
|
||||
# Create lib directory (if it does not already exist).
|
||||
base_lib_path="./${lib_dir}/${config_name}"
|
||||
|
||||
echo "${script_name}: creating ${base_lib_path}"
|
||||
mkdir -p ${base_lib_path}
|
||||
|
||||
|
||||
# Mirror config source tree to the object config sub-directory.
|
||||
echo "${script_name}: mirroring ${config_path} to ${obj_config_path}"
|
||||
${mirror_tree_sh} ${config_path} ${obj_config_path}
|
||||
|
||||
# Mirror frame source tree to the object frame sub-directory.
|
||||
echo "${script_name}: mirroring ${frame_path} to ${obj_frame_path}"
|
||||
|
||||
${mirror_tree_sh} ${frame_path} ${obj_frame_path}
|
||||
exit 1
|
||||
|
||||
|
||||
# Generate makefile fragments in the chosen configuration directory.
|
||||
./build/gen-make-frags/gen-make-frag.sh \
|
||||
-h -r -d -v1 \
|
||||
-h -r -v1 \
|
||||
-o ${script_name} \
|
||||
-p 'CONFIG' \
|
||||
${config_path} \
|
||||
@@ -207,7 +234,7 @@ exit 1
|
||||
|
||||
# Generate makefile fragments in the framework directory.
|
||||
./build/gen-make-frags/gen-make-frag.sh \
|
||||
-h -r -d -v1 \
|
||||
-h -r -v1 \
|
||||
-o ${script_name} \
|
||||
-p 'FRAME' \
|
||||
${frame_path} \
|
||||
|
||||
@@ -62,7 +62,7 @@ void PASTEMAC(ch,opname)( \
|
||||
y, incy ); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( axpyf, axpyf_opt_var1 )
|
||||
INSERT_GENTFUNC_BASIC( axpyf, AXPYF_KERNEL )
|
||||
|
||||
|
||||
//
|
||||
@@ -94,13 +94,13 @@ void PASTEMAC3(cha,chx,chy,opname)( \
|
||||
|
||||
// Define the basic set of functions unconditionally, and then also some
|
||||
// mixed datatype functions if requested.
|
||||
INSERT_GENTFUNC3U12_BASIC( axpyf, axpyf_opt_var1 )
|
||||
INSERT_GENTFUNC3U12_BASIC( axpyf, AXPYF_KERNEL )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_D( axpyf, axpyf_opt_var1 )
|
||||
INSERT_GENTFUNC3U12_MIX_D( axpyf, AXPYF_KERNEL )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_P( axpyf, axpyf_opt_var1 )
|
||||
INSERT_GENTFUNC3U12_MIX_P( axpyf, AXPYF_KERNEL )
|
||||
#endif
|
||||
|
||||
|
||||
@@ -35,6 +35,24 @@
|
||||
#include "bl2_axpyf_unb_var1.h"
|
||||
|
||||
|
||||
//
|
||||
// Define fusing factors (if they are not already defined by the user
|
||||
// in bl2_kernel.h).
|
||||
//
|
||||
#ifndef bl2_saxpyf_fuse_fac
|
||||
#define bl2_saxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_S
|
||||
#endif
|
||||
#ifndef bl2_daxpyf_fuse_fac
|
||||
#define bl2_daxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_D
|
||||
#endif
|
||||
#ifndef bl2_caxpyf_fuse_fac
|
||||
#define bl2_caxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_C
|
||||
#endif
|
||||
#ifndef bl2_zaxpyf_fuse_fac
|
||||
#define bl2_zaxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_Z
|
||||
#endif
|
||||
|
||||
|
||||
//
|
||||
// Prototype BLAS-like interfaces with homogeneous-typed operands.
|
||||
//
|
||||
|
||||
@@ -72,7 +72,7 @@ void PASTEMAC(ch,opname)( \
|
||||
z, incz ); \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( dotxaxpyf, dotxaxpyf_opt_var1 )
|
||||
INSERT_GENTFUNC_BASIC( dotxaxpyf, DOTXAXPYF_KERNEL )
|
||||
|
||||
|
||||
//
|
||||
@@ -114,13 +114,13 @@ void PASTEMAC3(cha,chb,chc,opname)( \
|
||||
|
||||
// Define the basic set of functions unconditionally, and then also some
|
||||
// mixed datatype functions if requested.
|
||||
INSERT_GENTFUNC3U12_BASIC( dotxaxpyf, dotxaxpyf_opt_var1 )
|
||||
INSERT_GENTFUNC3U12_BASIC( dotxaxpyf, DOTXAXPYF_KERNEL )
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_D( dotxaxpyf, dotxaxpyf_opt_var1 )
|
||||
INSERT_GENTFUNC3U12_MIX_D( dotxaxpyf, DOTXAXPYF_KERNEL )
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
|
||||
INSERT_GENTFUNC3U12_MIX_P( dotxaxpyf, dotxaxpyf_opt_var1 )
|
||||
INSERT_GENTFUNC3U12_MIX_P( dotxaxpyf, DOTXAXPYF_KERNEL )
|
||||
#endif
|
||||
|
||||
|
||||
@@ -35,6 +35,24 @@
|
||||
#include "bl2_dotxaxpyf_unb_var1.h"
|
||||
|
||||
|
||||
//
|
||||
// Define fusing factors (if they are not already defined by the user
|
||||
// in bl2_kernel.h).
|
||||
//
|
||||
#ifndef bl2_sdotxaxpyf_fuse_fac
|
||||
#define bl2_sdotxaxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_S
|
||||
#endif
|
||||
#ifndef bl2_ddotxaxpyf_fuse_fac
|
||||
#define bl2_ddotxaxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_D
|
||||
#endif
|
||||
#ifndef bl2_cdotxaxpyf_fuse_fac
|
||||
#define bl2_cdotxaxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_C
|
||||
#endif
|
||||
#ifndef bl2_zdotxaxpyf_fuse_fac
|
||||
#define bl2_zdotxaxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_Z
|
||||
#endif
|
||||
|
||||
|
||||
//
|
||||
// Prototype BLAS-like interfaces with homogeneous-typed operands.
|
||||
//
|
||||
|
||||
@@ -35,6 +35,24 @@
|
||||
#include "bl2_dotxf_unb_var1.h"
|
||||
|
||||
|
||||
//
|
||||
// Define fusing factors (if they are not already defined by the user
|
||||
// in bl2_kernel.h).
|
||||
//
|
||||
#ifndef bl2_sdotxf_fuse_fac
|
||||
#define bl2_sdotxf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_S
|
||||
#endif
|
||||
#ifndef bl2_ddotxf_fuse_fac
|
||||
#define bl2_ddotxf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_D
|
||||
#endif
|
||||
#ifndef bl2_cdotxf_fuse_fac
|
||||
#define bl2_cdotxf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_C
|
||||
#endif
|
||||
#ifndef bl2_zdotxf_fuse_fac
|
||||
#define bl2_zdotxf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_Z
|
||||
#endif
|
||||
|
||||
|
||||
//
|
||||
// Prototype BLAS-like interfaces with homogeneous-typed operands.
|
||||
//
|
||||
|
||||
@@ -44,94 +44,102 @@ typedef void (*FUNCPTR_T)(
|
||||
void* p
|
||||
);
|
||||
|
||||
static FUNCPTR_T ftypes[10][BLIS_NUM_FP_TYPES] =
|
||||
{
|
||||
// panel width = 0
|
||||
{
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
},
|
||||
// panel width = 1
|
||||
{
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
},
|
||||
// panel width = 2
|
||||
{
|
||||
PASTEMAC(s,packm_2xk),
|
||||
PASTEMAC(c,packm_2xk),
|
||||
PASTEMAC(d,packm_2xk),
|
||||
PASTEMAC(z,packm_2xk),
|
||||
},
|
||||
// panel width = 3
|
||||
{
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
},
|
||||
// panel width = 4
|
||||
{
|
||||
PASTEMAC(s,packm_4xk),
|
||||
PASTEMAC(c,packm_4xk),
|
||||
PASTEMAC(d,packm_4xk),
|
||||
PASTEMAC(z,packm_4xk),
|
||||
},
|
||||
// panel width = 5
|
||||
{
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
},
|
||||
// panel width = 6
|
||||
{
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
},
|
||||
// panel width = 7
|
||||
{
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
},
|
||||
// panel width = 8
|
||||
{
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
},
|
||||
// panel width = 9
|
||||
{
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
}
|
||||
|
||||
#undef GENARRAY
|
||||
#define GENARRAY( kername2, kername4, kername6, kername8 ) \
|
||||
\
|
||||
static FUNCPTR_T ftypes[10][BLIS_NUM_FP_TYPES] = \
|
||||
{ \
|
||||
/* panel width = 0 */ \
|
||||
{ \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
}, \
|
||||
/* panel width = 1 */ \
|
||||
{ \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
}, \
|
||||
/* panel width = 2 */ \
|
||||
{ \
|
||||
PASTEMAC(s,kername2), \
|
||||
PASTEMAC(c,kername2), \
|
||||
PASTEMAC(d,kername2), \
|
||||
PASTEMAC(z,kername2), \
|
||||
}, \
|
||||
/* panel width = 3 */ \
|
||||
{ \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
}, \
|
||||
/* panel width = 4 */ \
|
||||
{ \
|
||||
PASTEMAC(s,kername4), \
|
||||
PASTEMAC(c,kername4), \
|
||||
PASTEMAC(d,kername4), \
|
||||
PASTEMAC(z,kername4), \
|
||||
}, \
|
||||
/* panel width = 5 */ \
|
||||
{ \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
}, \
|
||||
/* panel width = 6 */ \
|
||||
{ \
|
||||
PASTEMAC(s,kername6), \
|
||||
PASTEMAC(c,kername6), \
|
||||
PASTEMAC(d,kername6), \
|
||||
PASTEMAC(z,kername6), \
|
||||
}, \
|
||||
/* panel width = 7 */ \
|
||||
{ \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
}, \
|
||||
/* panel width = 8 */ \
|
||||
{ \
|
||||
PASTEMAC(s,kername8), \
|
||||
PASTEMAC(c,kername8), \
|
||||
PASTEMAC(d,kername8), \
|
||||
PASTEMAC(z,kername8), \
|
||||
}, \
|
||||
/* panel width = 9 */ \
|
||||
{ \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
} \
|
||||
};
|
||||
|
||||
GENARRAY( PACKM_2XK_KERNEL,
|
||||
PACKM_4XK_KERNEL,
|
||||
PACKM_6XK_KERNEL,
|
||||
PACKM_8XK_KERNEL )
|
||||
|
||||
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, opname, varname ) \
|
||||
#define GENTFUNC( ctype, ch, opname, copyvker ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname)( \
|
||||
conj_t conja, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
void* beta, \
|
||||
void* a, inc_t inca, inc_t lda, \
|
||||
void* p, inc_t ldp \
|
||||
) \
|
||||
void PASTEMAC(ch,opname)( \
|
||||
conj_t conja, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
void* beta, \
|
||||
void* a, inc_t inca, inc_t lda, \
|
||||
void* p, inc_t ldp \
|
||||
) \
|
||||
{ \
|
||||
dim_t panel_dim; \
|
||||
num_t dt; \
|
||||
@@ -145,6 +153,17 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
/* Index into the array to extract the correct function pointer. */ \
|
||||
f = ftypes[panel_dim][dt]; \
|
||||
\
|
||||
/* If the panel dimension is unit, then we recognize that this allows
|
||||
the kernel to reduce to a copyv, so we call that kernel directly. */ \
|
||||
if ( m == 1 ) \
|
||||
{ \
|
||||
PASTEMAC2(ch,ch,copyvker)( conja, \
|
||||
n, \
|
||||
a, lda, \
|
||||
p, 1 ); \
|
||||
return; \
|
||||
} \
|
||||
\
|
||||
/* If there exists a kernel implementation for the panel dimension
|
||||
provided, and the "width" of the panel is equal to the leading
|
||||
@@ -174,5 +193,5 @@ void PASTEMAC(ch,varname)( \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( packm_cxk, packm_cxk )
|
||||
INSERT_GENTFUNC_BASIC( packm_cxk, COPYV_KERNEL )
|
||||
|
||||
@@ -32,9 +32,11 @@
|
||||
|
||||
*/
|
||||
|
||||
// Include headers for various packm micro-kernels.
|
||||
#include "bl2_packm_2xk.h"
|
||||
#include "bl2_packm_4xk.h"
|
||||
// Include headers for various packm kernels.
|
||||
#include "bl2_packm_ref_2xk.h"
|
||||
#include "bl2_packm_ref_4xk.h"
|
||||
#include "bl2_packm_ref_6xk.h"
|
||||
#include "bl2_packm_ref_8xk.h"
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
@@ -42,9 +42,11 @@ void PASTEMAC(ch,varname)( \
|
||||
dim_t n, \
|
||||
void* beta, \
|
||||
void* a, inc_t inca, inc_t lda, \
|
||||
void* p \
|
||||
void* p \
|
||||
) \
|
||||
{ \
|
||||
const inc_t ldp = 2; \
|
||||
\
|
||||
ctype* restrict beta_cast = beta; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
@@ -59,7 +61,7 @@ void PASTEMAC(ch,varname)( \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += 2; \
|
||||
pi1 += ldp; \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
@@ -70,7 +72,7 @@ void PASTEMAC(ch,varname)( \
|
||||
PASTEMAC2(ch,ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += 2; \
|
||||
pi1 += ldp; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
@@ -84,7 +86,7 @@ void PASTEMAC(ch,varname)( \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += 2; \
|
||||
pi1 += ldp; \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
@@ -95,11 +97,11 @@ void PASTEMAC(ch,varname)( \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += 2; \
|
||||
pi1 += ldp; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( packm_2xk, packm_2xk )
|
||||
INSERT_GENTFUNC_BASIC( packm_ref_6xk, packm_ref_6xk )
|
||||
|
||||
@@ -43,4 +43,4 @@ void PASTEMAC(ch,varname)( \
|
||||
void* p \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( packm_2xk )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_2xk )
|
||||
@@ -45,6 +45,8 @@ void PASTEMAC(ch,varname)( \
|
||||
void* p \
|
||||
) \
|
||||
{ \
|
||||
const inc_t ldp = 4; \
|
||||
\
|
||||
ctype* restrict beta_cast = beta; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
@@ -61,7 +63,7 @@ void PASTEMAC(ch,varname)( \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += 4; \
|
||||
pi1 += ldp; \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
@@ -74,7 +76,7 @@ void PASTEMAC(ch,varname)( \
|
||||
PASTEMAC2(ch,ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += 4; \
|
||||
pi1 += ldp; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
@@ -90,7 +92,7 @@ void PASTEMAC(ch,varname)( \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += 4; \
|
||||
pi1 += ldp; \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
@@ -103,11 +105,11 @@ void PASTEMAC(ch,varname)( \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += 4; \
|
||||
pi1 += ldp; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( packm_4xk, packm_4xk )
|
||||
INSERT_GENTFUNC_BASIC( packm_ref_6xk, packm_ref_6xk )
|
||||
|
||||
@@ -43,4 +43,4 @@ void PASTEMAC(ch,varname)( \
|
||||
void* p \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( packm_2xk )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_4xk )
|
||||
@@ -45,6 +45,8 @@ void PASTEMAC(ch,varname)( \
|
||||
void* p \
|
||||
) \
|
||||
{ \
|
||||
const inc_t ldp = 6; \
|
||||
\
|
||||
ctype* restrict beta_cast = beta; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
@@ -59,9 +61,11 @@ void PASTEMAC(ch,varname)( \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += 4; \
|
||||
pi1 += ldp; \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
@@ -72,9 +76,11 @@ void PASTEMAC(ch,varname)( \
|
||||
PASTEMAC2(ch,ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC2(ch,ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC2(ch,ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC2(ch,ch,copys)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC2(ch,ch,copys)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += 4; \
|
||||
pi1 += ldp; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
@@ -88,9 +94,11 @@ void PASTEMAC(ch,varname)( \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += 4; \
|
||||
pi1 += ldp; \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
@@ -101,13 +109,15 @@ void PASTEMAC(ch,varname)( \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += 4; \
|
||||
pi1 += ldp; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( packm_4xk, packm_4xk )
|
||||
INSERT_GENTFUNC_BASIC( packm_ref_6xk, packm_ref_6xk )
|
||||
|
||||
@@ -43,4 +43,4 @@ void PASTEMAC(ch,varname)( \
|
||||
void* p \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( packm_4xk )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_6xk )
|
||||
@@ -42,9 +42,11 @@ void PASTEMAC(ch,varname)( \
|
||||
dim_t n, \
|
||||
void* beta, \
|
||||
void* a, inc_t inca, inc_t lda, \
|
||||
void* p \
|
||||
void* p \
|
||||
) \
|
||||
{ \
|
||||
const inc_t ldp = 8; \
|
||||
\
|
||||
ctype* restrict beta_cast = beta; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
ctype* restrict pi1 = p; \
|
||||
@@ -57,9 +59,15 @@ void PASTEMAC(ch,varname)( \
|
||||
{ \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += 2; \
|
||||
pi1 += ldp; \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
@@ -68,9 +76,15 @@ void PASTEMAC(ch,varname)( \
|
||||
{ \
|
||||
PASTEMAC2(ch,ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC2(ch,ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC2(ch,ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC2(ch,ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC2(ch,ch,copys)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC2(ch,ch,copys)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC2(ch,ch,copys)( *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC2(ch,ch,copys)( *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += 2; \
|
||||
pi1 += ldp; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
@@ -82,9 +96,15 @@ void PASTEMAC(ch,varname)( \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += 2; \
|
||||
pi1 += ldp; \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
@@ -93,13 +113,19 @@ void PASTEMAC(ch,varname)( \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
|
||||
\
|
||||
alpha1 += lda; \
|
||||
pi1 += 2; \
|
||||
pi1 += ldp; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( packm_2xk, packm_2xk )
|
||||
INSERT_GENTFUNC_BASIC( packm_ref_6xk, packm_ref_6xk )
|
||||
|
||||
@@ -43,4 +43,4 @@ void PASTEMAC(ch,varname)( \
|
||||
void* p \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( packm_4xk )
|
||||
INSERT_GENTPROT_BASIC( packm_ref_8xk )
|
||||
@@ -1,97 +0,0 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# BLIS
|
||||
# An object-based framework for developing high-performance BLAS-like
|
||||
# libraries.
|
||||
#
|
||||
# Copyright (C) 2012, The University of Texas
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
# - Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# - Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# - Neither the name of The University of Texas nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
#
|
||||
#
|
||||
|
||||
main()
|
||||
{
|
||||
arch_dir="$1"
|
||||
|
||||
# Usage:
|
||||
# Example 1: ./set_symlink.sh generic
|
||||
# Example 2: ./set_symlink.sh x86
|
||||
|
||||
if [ ! -d "${arch_dir}" ]; then
|
||||
echo "$0: Directory for architecture ${arch_dir} does not exist!. Exiting."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
kernels="bl2_packm_2xk bl2_packm_4xk"
|
||||
|
||||
for t in $kernels; do
|
||||
|
||||
# Build the filename for the current kernel type.
|
||||
oldlink_c="${t}.c"
|
||||
oldlink_h="${t}.h"
|
||||
|
||||
# Verify that symlinks to the current kernel type exist; if so, remove them.
|
||||
if [ ! -f "${oldlink_c}" ]; then
|
||||
echo "$0: File ${oldlink_c} does not exist. Skipping removal."
|
||||
else
|
||||
echo "$0: removing ${oldlink_c}"
|
||||
rm -f ${oldlink_c}
|
||||
fi
|
||||
|
||||
if [ ! -f "${oldlink_h}" ]; then
|
||||
echo "$0: File ${oldlink_h} does not exist. Skipping removal."
|
||||
else
|
||||
echo "$0: removing ${oldlink_h}"
|
||||
rm -f ${oldlink_h}
|
||||
fi
|
||||
|
||||
# Build the filename for the current kernel type in the desired
|
||||
# directory.
|
||||
newfile_c="${arch_dir}/${t}.c"
|
||||
newfile_h="${arch_dir}/${t}.h"
|
||||
|
||||
# Verify that the files exist; if so, symlink them.
|
||||
if [ ! -f "${newfile_c}" ]; then
|
||||
echo "$0: Attempted to symlink file ${newfile_c}, which does not exist! Skipping."
|
||||
else
|
||||
echo "$0: symlinking (and touching) ${newfile_c}"
|
||||
ln -s ${newfile_c}
|
||||
touch ${newfile_c}
|
||||
fi
|
||||
|
||||
if [ ! -f "${newfile_h}" ]; then
|
||||
echo "$0: Attempted to symlink file ${newfile_h}, which does not exist! Skipping."
|
||||
else
|
||||
echo "$0: symlinking ${newfile_h}"
|
||||
ln -s ${newfile_h}
|
||||
fi
|
||||
|
||||
done
|
||||
}
|
||||
|
||||
main "$@"
|
||||
|
||||
@@ -44,94 +44,103 @@ typedef void (*FUNCPTR_T)(
|
||||
void* a, inc_t inca, inc_t lda
|
||||
);
|
||||
|
||||
static FUNCPTR_T ftypes[10][BLIS_NUM_FP_TYPES] =
|
||||
{
|
||||
// panel width = 0
|
||||
{
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
},
|
||||
// panel width = 1
|
||||
{
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
},
|
||||
// panel width = 2
|
||||
{
|
||||
PASTEMAC(s,unpackm_2xk),
|
||||
PASTEMAC(c,unpackm_2xk),
|
||||
PASTEMAC(d,unpackm_2xk),
|
||||
PASTEMAC(z,unpackm_2xk),
|
||||
},
|
||||
// panel width = 3
|
||||
{
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
},
|
||||
// panel width = 4
|
||||
{
|
||||
PASTEMAC(s,unpackm_4xk),
|
||||
PASTEMAC(c,unpackm_4xk),
|
||||
PASTEMAC(d,unpackm_4xk),
|
||||
PASTEMAC(z,unpackm_4xk),
|
||||
},
|
||||
// panel width = 5
|
||||
{
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
},
|
||||
// panel width = 6
|
||||
{
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
},
|
||||
// panel width = 7
|
||||
{
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
},
|
||||
// panel width = 8
|
||||
{
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
},
|
||||
// panel width = 9
|
||||
{
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
}
|
||||
|
||||
#undef GENARRAY
|
||||
#define GENARRAY( kername2, kername4, kername6, kername8 ) \
|
||||
\
|
||||
static FUNCPTR_T ftypes[10][BLIS_NUM_FP_TYPES] = \
|
||||
{ \
|
||||
/* panel width = 0 */ \
|
||||
{ \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
}, \
|
||||
/* panel width = 1 */ \
|
||||
{ \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
}, \
|
||||
/* panel width = 2 */ \
|
||||
{ \
|
||||
PASTEMAC(s,kername2), \
|
||||
PASTEMAC(c,kername2), \
|
||||
PASTEMAC(d,kername2), \
|
||||
PASTEMAC(z,kername2), \
|
||||
}, \
|
||||
/* panel width = 3 */ \
|
||||
{ \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
}, \
|
||||
/* panel width = 4 */ \
|
||||
{ \
|
||||
PASTEMAC(s,kername4), \
|
||||
PASTEMAC(c,kername4), \
|
||||
PASTEMAC(d,kername4), \
|
||||
PASTEMAC(z,kername4), \
|
||||
}, \
|
||||
/* panel width = 5 */ \
|
||||
{ \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
}, \
|
||||
/* panel width = 6 */ \
|
||||
{ \
|
||||
PASTEMAC(s,kername6), \
|
||||
PASTEMAC(c,kername6), \
|
||||
PASTEMAC(d,kername6), \
|
||||
PASTEMAC(z,kername6), \
|
||||
}, \
|
||||
/* panel width = 7 */ \
|
||||
{ \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
}, \
|
||||
/* panel width = 8 */ \
|
||||
{ \
|
||||
PASTEMAC(s,kername8), \
|
||||
PASTEMAC(c,kername8), \
|
||||
PASTEMAC(d,kername8), \
|
||||
PASTEMAC(z,kername8), \
|
||||
}, \
|
||||
/* panel width = 9 */ \
|
||||
{ \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
NULL, \
|
||||
} \
|
||||
};
|
||||
|
||||
GENARRAY( UNPACKM_2XK_KERNEL,
|
||||
UNPACKM_4XK_KERNEL,
|
||||
UNPACKM_6XK_KERNEL,
|
||||
UNPACKM_8XK_KERNEL )
|
||||
|
||||
|
||||
|
||||
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC( ctype, ch, opname, varname ) \
|
||||
#define GENTFUNC( ctype, ch, opname, copyvker ) \
|
||||
\
|
||||
void PASTEMAC(ch,varname)( \
|
||||
conj_t conjp, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
void* beta, \
|
||||
void* p, inc_t ldp, \
|
||||
void* a, inc_t inca, inc_t lda \
|
||||
) \
|
||||
void PASTEMAC(ch,opname)( \
|
||||
conj_t conjp, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
void* beta, \
|
||||
void* p, inc_t ldp, \
|
||||
void* a, inc_t inca, inc_t lda \
|
||||
) \
|
||||
{ \
|
||||
dim_t panel_dim; \
|
||||
num_t dt; \
|
||||
@@ -145,6 +154,17 @@ void PASTEMAC(ch,varname)( \
|
||||
\
|
||||
/* Index into the array to extract the correct function pointer. */ \
|
||||
f = ftypes[panel_dim][dt]; \
|
||||
\
|
||||
/* If the panel dimension is unit, then we recognize that this allows
|
||||
the kernel to reduce to a copyv, so we call that kernel directly. */ \
|
||||
if ( m == 1 ) \
|
||||
{ \
|
||||
PASTEMAC2(ch,ch,copyvker)( conjp, \
|
||||
n, \
|
||||
p, 1, \
|
||||
a, lda ); \
|
||||
return; \
|
||||
} \
|
||||
\
|
||||
/* If there exists a kernel implementation for the panel dimension
|
||||
provided, and the "width" of the panel is equal to the leading
|
||||
@@ -174,5 +194,5 @@ void PASTEMAC(ch,varname)( \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( unpackm_cxk, unpackm_cxk )
|
||||
INSERT_GENTFUNC_BASIC( unpackm_cxk, COPYV_KERNEL )
|
||||
|
||||
@@ -32,9 +32,11 @@
|
||||
|
||||
*/
|
||||
|
||||
// Include headers for various unpackm micro-kernels.
|
||||
#include "bl2_unpackm_2xk.h"
|
||||
#include "bl2_unpackm_4xk.h"
|
||||
// Include headers for various unpackm kernels.
|
||||
#include "bl2_unpackm_ref_2xk.h"
|
||||
#include "bl2_unpackm_ref_4xk.h"
|
||||
#include "bl2_unpackm_ref_6xk.h"
|
||||
#include "bl2_unpackm_ref_8xk.h"
|
||||
|
||||
|
||||
#undef GENTPROT
|
||||
@@ -45,6 +45,8 @@ void PASTEMAC(ch,varname)( \
|
||||
void* a, inc_t inca, inc_t lda \
|
||||
) \
|
||||
{ \
|
||||
const inc_t ldp = 2; \
|
||||
\
|
||||
ctype* restrict beta_cast = beta; \
|
||||
ctype* restrict pi1 = p; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
@@ -58,7 +60,7 @@ void PASTEMAC(ch,varname)( \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
\
|
||||
pi1 += 2; \
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
} \
|
||||
} \
|
||||
@@ -69,7 +71,7 @@ void PASTEMAC(ch,varname)( \
|
||||
PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
\
|
||||
pi1 += 2; \
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
} \
|
||||
} \
|
||||
@@ -83,7 +85,7 @@ void PASTEMAC(ch,varname)( \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
\
|
||||
pi1 += 2; \
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
} \
|
||||
} \
|
||||
@@ -94,12 +96,12 @@ void PASTEMAC(ch,varname)( \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
\
|
||||
pi1 += 2; \
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( unpackm_2xk, unpackm_2xk )
|
||||
INSERT_GENTFUNC_BASIC( unpackm_ref_4xk, unpackm_ref_4xk )
|
||||
|
||||
@@ -43,4 +43,4 @@ void PASTEMAC(ch,varname)( \
|
||||
void* a, inc_t inca, inc_t lda \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( unpackm_2xk )
|
||||
INSERT_GENTPROT_BASIC( unpackm_ref_2xk )
|
||||
@@ -45,6 +45,8 @@ void PASTEMAC(ch,varname)( \
|
||||
void* a, inc_t inca, inc_t lda \
|
||||
) \
|
||||
{ \
|
||||
const inc_t ldp = 4; \
|
||||
\
|
||||
ctype* restrict beta_cast = beta; \
|
||||
ctype* restrict pi1 = p; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
@@ -60,7 +62,7 @@ void PASTEMAC(ch,varname)( \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
\
|
||||
pi1 += 4; \
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
} \
|
||||
} \
|
||||
@@ -73,7 +75,7 @@ void PASTEMAC(ch,varname)( \
|
||||
PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
\
|
||||
pi1 += 4; \
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
} \
|
||||
} \
|
||||
@@ -89,7 +91,7 @@ void PASTEMAC(ch,varname)( \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
\
|
||||
pi1 += 4; \
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
} \
|
||||
} \
|
||||
@@ -102,12 +104,12 @@ void PASTEMAC(ch,varname)( \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
\
|
||||
pi1 += 4; \
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( unpackm_4xk, unpackm_4xk )
|
||||
INSERT_GENTFUNC_BASIC( unpackm_ref_4xk, unpackm_ref_4xk )
|
||||
|
||||
@@ -43,4 +43,4 @@ void PASTEMAC(ch,varname)( \
|
||||
void* a, inc_t inca, inc_t lda \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( unpackm_4xk )
|
||||
INSERT_GENTPROT_BASIC( unpackm_ref_4xk )
|
||||
@@ -45,6 +45,8 @@ void PASTEMAC(ch,varname)( \
|
||||
void* a, inc_t inca, inc_t lda \
|
||||
) \
|
||||
{ \
|
||||
const inc_t ldp = 6; \
|
||||
\
|
||||
ctype* restrict beta_cast = beta; \
|
||||
ctype* restrict pi1 = p; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
@@ -59,8 +61,10 @@ void PASTEMAC(ch,varname)( \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
\
|
||||
pi1 += 4; \
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
} \
|
||||
} \
|
||||
@@ -72,8 +76,10 @@ void PASTEMAC(ch,varname)( \
|
||||
PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
\
|
||||
pi1 += 4; \
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
} \
|
||||
} \
|
||||
@@ -88,8 +94,10 @@ void PASTEMAC(ch,varname)( \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
\
|
||||
pi1 += 4; \
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
} \
|
||||
} \
|
||||
@@ -101,13 +109,15 @@ void PASTEMAC(ch,varname)( \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
\
|
||||
pi1 += 4; \
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( unpackm_4xk, unpackm_4xk )
|
||||
INSERT_GENTFUNC_BASIC( unpackm_ref_6xk, unpackm_ref_6xk )
|
||||
|
||||
@@ -43,4 +43,4 @@ void PASTEMAC(ch,varname)( \
|
||||
void* a, inc_t inca, inc_t lda \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( unpackm_2xk )
|
||||
INSERT_GENTPROT_BASIC( unpackm_ref_6xk )
|
||||
@@ -45,6 +45,8 @@ void PASTEMAC(ch,varname)( \
|
||||
void* a, inc_t inca, inc_t lda \
|
||||
) \
|
||||
{ \
|
||||
const inc_t ldp = 8; \
|
||||
\
|
||||
ctype* restrict beta_cast = beta; \
|
||||
ctype* restrict pi1 = p; \
|
||||
ctype* restrict alpha1 = a; \
|
||||
@@ -57,8 +59,14 @@ void PASTEMAC(ch,varname)( \
|
||||
{ \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
\
|
||||
pi1 += 2; \
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
} \
|
||||
} \
|
||||
@@ -68,8 +76,14 @@ void PASTEMAC(ch,varname)( \
|
||||
{ \
|
||||
PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
\
|
||||
pi1 += 2; \
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
} \
|
||||
} \
|
||||
@@ -82,8 +96,14 @@ void PASTEMAC(ch,varname)( \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
\
|
||||
pi1 += 2; \
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
} \
|
||||
} \
|
||||
@@ -93,13 +113,19 @@ void PASTEMAC(ch,varname)( \
|
||||
{ \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
|
||||
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
|
||||
\
|
||||
pi1 += 2; \
|
||||
pi1 += ldp; \
|
||||
alpha1 += lda; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BASIC( unpackm_2xk, unpackm_2xk )
|
||||
INSERT_GENTFUNC_BASIC( unpackm_ref_6xk, unpackm_ref_6xk )
|
||||
|
||||
@@ -43,4 +43,4 @@ void PASTEMAC(ch,varname)( \
|
||||
void* a, inc_t inca, inc_t lda \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( unpackm_4xk )
|
||||
INSERT_GENTPROT_BASIC( unpackm_ref_8xk )
|
||||
@@ -1,97 +0,0 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# BLIS
|
||||
# An object-based framework for developing high-performance BLAS-like
|
||||
# libraries.
|
||||
#
|
||||
# Copyright (C) 2012, The University of Texas
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
# - Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# - Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# - Neither the name of The University of Texas nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
#
|
||||
#
|
||||
|
||||
main()
|
||||
{
|
||||
arch_dir="$1"
|
||||
|
||||
# Usage:
|
||||
# Example 1: ./set_symlink.sh generic
|
||||
# Example 2: ./set_symlink.sh x86
|
||||
|
||||
if [ ! -d "${arch_dir}" ]; then
|
||||
echo "$0: Directory for architecture ${arch_dir} does not exist!. Exiting."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
kernels="bl2_unpackm_2xk bl2_unpackm_4xk"
|
||||
|
||||
for t in $kernels; do
|
||||
|
||||
# Build the filename for the current kernel type.
|
||||
oldlink_c="${t}.c"
|
||||
oldlink_h="${t}.h"
|
||||
|
||||
# Verify that symlinks to the current kernel type exist; if so, remove them.
|
||||
if [ ! -f "${oldlink_c}" ]; then
|
||||
echo "$0: File ${oldlink_c} does not exist. Skipping removal."
|
||||
else
|
||||
echo "$0: removing ${oldlink_c}"
|
||||
rm -f ${oldlink_c}
|
||||
fi
|
||||
|
||||
if [ ! -f "${oldlink_h}" ]; then
|
||||
echo "$0: File ${oldlink_h} does not exist. Skipping removal."
|
||||
else
|
||||
echo "$0: removing ${oldlink_h}"
|
||||
rm -f ${oldlink_h}
|
||||
fi
|
||||
|
||||
# Build the filename for the current kernel type in the desired
|
||||
# directory.
|
||||
newfile_c="${arch_dir}/${t}.c"
|
||||
newfile_h="${arch_dir}/${t}.h"
|
||||
|
||||
# Verify that the files exist; if so, symlink them.
|
||||
if [ ! -f "${newfile_c}" ]; then
|
||||
echo "$0: Attempted to symlink file ${newfile_c}, which does not exist! Skipping."
|
||||
else
|
||||
echo "$0: symlinking (and touching) ${newfile_c}"
|
||||
ln -s ${newfile_c}
|
||||
touch ${newfile_c}
|
||||
fi
|
||||
|
||||
if [ ! -f "${newfile_h}" ]; then
|
||||
echo "$0: Attempted to symlink file ${newfile_h}, which does not exist! Skipping."
|
||||
else
|
||||
echo "$0: symlinking ${newfile_h}"
|
||||
ln -s ${newfile_h}
|
||||
fi
|
||||
|
||||
done
|
||||
}
|
||||
|
||||
main "$@"
|
||||
|
||||
@@ -37,24 +37,18 @@
|
||||
#define FUNCPTR_T machval_fp
|
||||
|
||||
typedef void (*FUNCPTR_T)(
|
||||
machval_t machval,
|
||||
void* val
|
||||
machval_t mval,
|
||||
void* v
|
||||
);
|
||||
|
||||
// Manually initialize a function pointer array.
|
||||
static FUNCPTR_T ftypes[BLIS_NUM_FP_TYPES] =
|
||||
{
|
||||
bl2_smachval,
|
||||
NULL,
|
||||
bl2_dmachval,
|
||||
NULL
|
||||
};
|
||||
static FUNCPTR_T GENARRAY(ftypes,machval);
|
||||
|
||||
|
||||
|
||||
//
|
||||
// Define object-based interface.
|
||||
//
|
||||
void bl2_machval( machval_t machval,
|
||||
void bl2_machval( machval_t mval,
|
||||
obj_t* v )
|
||||
{
|
||||
num_t dt_v = bl2_obj_datatype( *v );
|
||||
@@ -67,7 +61,7 @@ void bl2_machval( machval_t machval,
|
||||
f = ftypes[dt_v];
|
||||
|
||||
// Invoke the function.
|
||||
f( machval,
|
||||
f( mval,
|
||||
buf_v );
|
||||
}
|
||||
|
||||
@@ -75,34 +69,37 @@ void bl2_machval( machval_t machval,
|
||||
//
|
||||
// Define BLAS-like interfaces.
|
||||
//
|
||||
#undef GENTFUNC
|
||||
#define GENTFUNC3( ctype, ctype_r, ch, chr, opname, varname ) \
|
||||
#undef GENTFUNC1R
|
||||
#define GENTFUNC1R( ctype, ctype_r, ch, chr, opname, varname ) \
|
||||
\
|
||||
void PASTEMAC(ch,opname)( \
|
||||
machvar_t machval, \
|
||||
ctype* val, \
|
||||
machval_t mval, \
|
||||
void* v \
|
||||
) \
|
||||
{ \
|
||||
static ctype_r pvals[ BLIS_NUM_MACH_PARAMS ]; \
|
||||
\
|
||||
static bool_t first_time = TRUE; \
|
||||
dim_t val_i = machval - BLIS_MACH_PARAM_FIRST; \
|
||||
\
|
||||
dim_t val_i = mval - BLIS_MACH_PARAM_FIRST; \
|
||||
ctype* v_cast = v; \
|
||||
\
|
||||
/* If this is the first time through, call the underlying
|
||||
code to discover each machine parameter. */ \
|
||||
if ( first_time ) \
|
||||
{ \
|
||||
char lapack_machval; \
|
||||
dim_t i; \
|
||||
char lapack_mval; \
|
||||
dim_t m, i; \
|
||||
\
|
||||
for( m = BLIS_MACH_PARAM_FIRST, i = 0; \
|
||||
m <= BLIS_MACH_PARAM_LAST; \
|
||||
++m, ++i ) \
|
||||
for( i = 0, m = BLIS_MACH_PARAM_FIRST; \
|
||||
i < BLIS_NUM_MACH_PARAMS - 1; \
|
||||
++i, ++m ) \
|
||||
{ \
|
||||
bl2_param_map_to_netlib_machval( m, &lapack_machval ); \
|
||||
bl2_param_map_blis_to_netlib_machval( m, &lapack_mval ); \
|
||||
\
|
||||
/*printf( "bl2_machval: querying %u %c\n", m, lapack_machval );*/ \
|
||||
/*printf( "bl2_machval: querying %u %c\n", m, lapack_mval );*/ \
|
||||
\
|
||||
pvals[i] = PASTEMAC(chr,varname)( &lapack_machval, 1 ); \
|
||||
pvals[i] = PASTEMAC(chr,varname)( &lapack_mval, 1 ); \
|
||||
\
|
||||
/*printf( "bl2_machval: got back %34.29e\n", pvals[i] ); */ \
|
||||
} \
|
||||
@@ -116,12 +113,11 @@ void PASTEMAC(ch,opname)( \
|
||||
/* Copy the requested parameter value to the output buffer, which
|
||||
may involve a demotion from the complex to real domain. */ \
|
||||
PASTEMAC2(chr,ch,copys)( pvals[ val_i ], \
|
||||
*val ); \
|
||||
*v_cast ); \
|
||||
}
|
||||
|
||||
|
||||
GENTFUNC( float, float, s, s, machval, lamch )
|
||||
GENTFUNC( double, double, d, d, machval, lamch )
|
||||
GENTFUNC( scomplex, float, c, s, machval, lamch )
|
||||
GENTFUNC( dcomplex, double, z, d, machval, lamch )
|
||||
GENTFUNC1R( float, float, s, s, machval, lamch )
|
||||
GENTFUNC1R( double, double, d, d, machval, lamch )
|
||||
GENTFUNC1R( scomplex, float, c, s, machval, lamch )
|
||||
GENTFUNC1R( dcomplex, double, z, d, machval, lamch )
|
||||
|
||||
|
||||
@@ -32,10 +32,14 @@
|
||||
|
||||
*/
|
||||
|
||||
#include "bl2_lsame.h"
|
||||
#include "bl2_slamch.h"
|
||||
#include "bl2_dlamch.h"
|
||||
|
||||
//
|
||||
// Prototype object-based interface.
|
||||
//
|
||||
void bl2_machval( machval_t machval,
|
||||
void bl2_machval( machval_t mval,
|
||||
obj_t* v );
|
||||
|
||||
|
||||
@@ -46,8 +50,8 @@ void bl2_machval( machval_t machval,
|
||||
#define GENTPROT( ctype, ch, opname ) \
|
||||
\
|
||||
void PASTEMAC(ch,opname)( \
|
||||
machval_t machval, \
|
||||
ctype* val \
|
||||
machval_t mval, \
|
||||
void* v \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT_BASIC( machval )
|
||||
|
||||
@@ -49,8 +49,8 @@ void bl2_param_map_blis_to_netlib_trans( trans_t trans, char* blas_trans )
|
||||
|
||||
void bl2_param_map_blis_to_netlib_uplo( uplo_t uplo, char* blas_uplo )
|
||||
{
|
||||
if ( uplo == BLIS_LOWER_TRIANGULAR ) *blas_uplo = 'L';
|
||||
else if ( uplo == BLIS_UPPER_TRIANGULAR ) *blas_uplo = 'U';
|
||||
if ( uplo == BLIS_LOWER ) *blas_uplo = 'L';
|
||||
else if ( uplo == BLIS_UPPER ) *blas_uplo = 'U';
|
||||
else
|
||||
{
|
||||
bl2_check_error_code( BLIS_INVALID_UPLO );
|
||||
@@ -111,8 +111,8 @@ void bl2_param_map_netlib_to_blis_trans( char* trans, trans_t* blis_trans )
|
||||
|
||||
void bl2_param_map_netlib_to_blis_uplo( char* uplo, uplo_t* blis_uplo )
|
||||
{
|
||||
if ( *uplo == 'l' || *uplo == 'L' ) *blis_uplo = BLIS_LOWER_TRIANGULAR;
|
||||
else if ( *uplo == 'u' || *uplo == 'U' ) *blis_uplo = BLIS_UPPER_TRIANGULAR;
|
||||
if ( *uplo == 'l' || *uplo == 'L' ) *blis_uplo = BLIS_LOWER;
|
||||
else if ( *uplo == 'u' || *uplo == 'U' ) *blis_uplo = BLIS_UPPER;
|
||||
else
|
||||
{
|
||||
bl2_check_error_code( BLIS_INVALID_UPLO );
|
||||
@@ -156,8 +156,8 @@ void bl2_param_map_char_to_blis_trans( char* trans, trans_t* blis_trans )
|
||||
|
||||
void bl2_param_map_char_to_blis_uplo( char* uplo, uplo_t* blis_uplo )
|
||||
{
|
||||
if ( *uplo == 'l' || *uplo == 'L' ) *blis_uplo = BLIS_LOWER_TRIANGULAR;
|
||||
else if ( *uplo == 'u' || *uplo == 'U' ) *blis_uplo = BLIS_UPPER_TRIANGULAR;
|
||||
if ( *uplo == 'l' || *uplo == 'L' ) *blis_uplo = BLIS_LOWER;
|
||||
else if ( *uplo == 'u' || *uplo == 'U' ) *blis_uplo = BLIS_UPPER;
|
||||
else
|
||||
{
|
||||
bl2_check_error_code( BLIS_INVALID_UPLO );
|
||||
|
||||
@@ -9,6 +9,8 @@ extern "C" {
|
||||
#include "bl2_f2c.h"
|
||||
#include "stdio.h"
|
||||
|
||||
double bl2_pow_di( doublereal* a, integer* n );
|
||||
|
||||
/* Table of constant values */
|
||||
|
||||
//static integer c__1 = 1;
|
||||
|
||||
35
frame/base/noopt/bl2_dlamch.h
Normal file
35
frame/base/noopt/bl2_dlamch.h
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2012, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
doublereal bl2_dlamch( char* cmach, ftnlen cmach_len );
|
||||
35
frame/base/noopt/bl2_lsame.h
Normal file
35
frame/base/noopt/bl2_lsame.h
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2012, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
logical bl2_lsame( char* ca, char* cb, ftnlen ca_len, ftnlen cb_len );
|
||||
@@ -9,6 +9,8 @@ extern "C" {
|
||||
#include "bl2_f2c.h"
|
||||
#include "stdio.h"
|
||||
|
||||
double bl2_pow_ri( real* a, integer* n );
|
||||
|
||||
/* Table of constant values */
|
||||
|
||||
//static integer c__1 = 1;
|
||||
|
||||
35
frame/base/noopt/bl2_slamch.h
Normal file
35
frame/base/noopt/bl2_slamch.h
Normal file
@@ -0,0 +1,35 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2012, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
real bl2_slamch( char* cmach, ftnlen cmach_len );
|
||||
@@ -368,7 +368,7 @@ typedef enum
|
||||
|
||||
typedef enum
|
||||
{
|
||||
BLIS_MACH_EPS,
|
||||
BLIS_MACH_EPS = 0,
|
||||
BLIS_MACH_SFMIN,
|
||||
BLIS_MACH_BASE,
|
||||
BLIS_MACH_PREC,
|
||||
|
||||
@@ -62,11 +62,8 @@ extern "C" {
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
|
||||
// Handle the results of checking for time.h and sys/time.h.
|
||||
// gettimeofday() needs this.
|
||||
#if HAVE_SYS_TIME_H
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include <sys/time.h>
|
||||
#include <time.h>
|
||||
|
||||
|
||||
@@ -79,6 +76,8 @@ extern "C" {
|
||||
|
||||
#include "bl2_extern_defs.h"
|
||||
|
||||
#include "bl2_f2c.h"
|
||||
|
||||
|
||||
// -- Base operation prototypes --
|
||||
|
||||
|
||||
Reference in New Issue
Block a user