Added build system and continued reorganization.

Details:
- Added/renamed packm, unpackm kernels.
- Added machine value routines.
- Added param_map facility.
- Renamed AUTHORS to CREDITS.
- Added Makefile; continued to expand upon existing configure script.
- #define fuse_fac macros in operation headers if not defined already
  (by the user in bl2_kernels.h).
This commit is contained in:
Field G. Van Zee
2012-12-04 19:22:14 -06:00
parent 00f3498a89
commit 2f272b40f4
50 changed files with 740 additions and 564 deletions

28
.gitignore vendored
View File

@@ -1,30 +1,34 @@
# generic files to ignore
# -- generic files to ignore --
*~ # emacs backup files
*.swp # vim swap file
*.out # anything
# emacs backup files
*~
# vim backup files
*.swp
# compiler-related
# -- compiler-related --
*.o # object files
*.a # static library archives
*.x # test executables
# object files
*.o
# static library archives
*.a
# test executables
*.x
# build system files
# -- build system files --
config.mk
# directories with build products
# -- directories with build products --
lib
obj
# makefile fragments
# -- makefile fragments --
.fragment.mk
fragment.mk
# misc.
# -- misc. --
output*.m

View File

@@ -18,8 +18,8 @@ conceived, designed, and developed, including:
Rhys Ulerich (The University of Texas at Austin)
Robert van de Geijn (The University of Texas at Austin)
Thanks go the following individuals for helping port very early
versions of BLIS to new architectures as proofs-of-concept:
Thanks go the following individuals for porting very early versions
of BLIS to new architectures as proofs-of-concept:
Francisco Igual (The University of Texas at Austin)
Tyler Smith (The University of Texas at Austin)

174
Makefile
View File

@@ -77,8 +77,12 @@ LIB_DIR := lib
NOOPT_DIR := noopt
# Construct a path to the framework source tree.
NOOPT_TEXT := "(NOTE: optimizations disabled)"
# Construct some paths.
FRAME_PATH := ./$(FRAME_DIR)
OBJ_PATH := ./$(OBJ_DIR)
LIB_PATH := ./$(LIB_DIR)
@@ -106,9 +110,9 @@ endif
CONFIG_PATH := ./$(CONFIG_DIR)/$(CONFIG_NAME)
# Construct base paths for the object file tree.
BASE_OBJ_DIR := ./$(OBJ_DIR)/$(CONFIG_NAME)
BASE_OBJ_CONFIG_PATH := $(BASE_OBJ_DIR)/$(CONFIG_DIR)
BASE_OBJ_FRAME_PATH := $(BASE_OBJ_DIR)/$(FRAME_DIR)
BASE_OBJ_PATH := ./$(OBJ_DIR)/$(CONFIG_NAME)
BASE_OBJ_CONFIG_PATH := $(BASE_OBJ_PATH)/$(CONFIG_DIR)
BASE_OBJ_FRAME_PATH := $(BASE_OBJ_PATH)/$(FRAME_DIR)
# Construct base path for the library.
BASE_LIB_PATH := ./$(LIB_DIR)/$(CONFIG_NAME)
@@ -144,7 +148,7 @@ endif
# Construct the architecture-version string, which will be used to name the
# library upon installation.
VERSION := $(shell cat version)
CONF_VERS := $(CONFIG_NAME)-$(VERSION)
VERS_CONF := $(VERSION)-$(CONFIG_NAME)
# --- Library names ---
@@ -172,33 +176,33 @@ MK_CONFIG_OBJS :=
MK_CONFIG_NOOPT_OBJS :=
# Append the base library path to the library name.
MK_ALL_BLIS_LIB := $(BASE_LIB_DIR)/$(BLIS_LIB_NAME)
MK_ALL_BLIS_LIB := $(BASE_LIB_PATH)/$(BLIS_LIB_NAME)
# --- Define install target names for static libraries ---
MK_BLIS_LIB := $(MK_ALL_BLIS_LIB)
MK_BLIS_LIB_INST := $(patsubst $(BASE_LIB_DIR)/%.a, \
MK_BLIS_LIB_INST := $(patsubst $(BASE_LIB_PATH)/%.a, \
$(INSTALL_PREFIX)/lib/%.a, \
$(MK_BLIS_LIB))
MK_BLIS_LIB_INST_W_CONF_VERS := $(patsubst $(BASE_LIB_DIR)/%.a, \
$(INSTALL_PREFIX)/lib/%-$(CONF_VERS).a, \
MK_BLIS_LIB_INST_W_VERS_CONF := $(patsubst $(BASE_LIB_PATH)/%.a, \
$(INSTALL_PREFIX)/lib/%-$(VERS_CONF).a, \
$(MK_BLIS_LIB))
# --- Determine which libraries to build ---
MK_LIBS :=
MK_LIBS_INST :=
MK_LIBS_INST_W_CONF_VERS :=
MK_LIBS_INST_W_VERS_CONF :=
ifeq ($(BLIS_ENABLE_STATIC_BUILD),yes)
MK_LIBS += $(MK_BLIS_LIB)
MK_LIBS_INST += $(MK_BLIS_LIB_INST)
MK_LIBS_INST_W_CONF_VERS += $(MK_BLIS_LIB_INST_W_CONF_VERS)
MK_LIBS_INST_W_VERS_CONF += $(MK_BLIS_LIB_INST_W_VERS_CONF)
endif
# Set the include directory names
MK_INCL_DIR_INST := $(INSTALL_PREFIX)/include
MK_INCL_DIR_INST_W_CONF_VERS := $(INSTALL_PREFIX)/include-$(CONF_VERS)
MK_INCL_DIR_INST_W_VERS_CONF := $(INSTALL_PREFIX)/include-$(VERS_CONF)
@@ -212,16 +216,21 @@ MK_INCL_DIR_INST_W_CONF_VERS := $(INSTALL_PREFIX)/include-$(CONF_VERS)
# makefile fragments reside.
FRAGMENT_DIR_PATHS :=
# The only fragment sub-directories that we build from are the config and
# and frame directories.
FRAGMENT_SUB_DIRS := $(CONFIG_PATH) $(FRAME_PATH)
# This variable is used by the include statements as they recursively include
# one another. We initialize it to the current directory.
# one another. For the framework source tree, we initialize it to the current
# directory since '.' is its parent.
PARENT_PATH := .
# Recursively include all the makefile fragments.
-include $(addsuffix /$(FRAGMENT_MK), $(FRAGMENT_SUB_DIRS))
# Recursively include all the makefile fragments in the framework itself.
-include $(addsuffix /$(FRAGMENT_MK), $(FRAME_PATH))
# Now set PARENT_PATH to ./config in preparation to include the fragments in
# the configuration sub-directory.
PARENT_PATH := ./$(CONFIG_DIR)
# Recursively include all the makefile fragments in the configuration
# sub-directory.
-include $(addsuffix /$(FRAGMENT_MK), $(CONFIG_PATH))
# Create a list of the makefile fragments.
MAKEFILE_FRAGMENTS := $(addsuffix /$(FRAGMENT_MK), $(FRAGMENT_DIR_PATHS))
@@ -317,17 +326,17 @@ check: check-make-defs check-fragments check-config
check-config:
ifeq ($(CONFIG_MK_PRESENT),no)
$(error Cannot proceed: config.mk not detected! Run configure first)
$(error Cannot proceed: config.mk not detected! Run configure first)
endif
check-fragments: check-config
ifeq ($(MAKEFILE_FRAGMENTS_PRESENT),no)
$(error Cannot proceed: makefile fragments not detected! Run configure first)
$(error Cannot proceed: makefile fragments not detected! Run configure first)
endif
check-make-defs: check-fragments
ifeq ($(MAKE_DEFS_MK_PRESENT),no)
$(error Cannot proceed: make_defs.mk not detected! Invalid configuration)
$(error Cannot proceed: make_defs.mk not detected! Invalid configuration)
endif
@@ -337,27 +346,32 @@ endif
# --- General source code / object code rules ---
$(BASE_OBJ_FRAME_DIR)/%.o: $(FRAME_PATH)/%.c $(CONFIG_MK_PATH)
#$(BASE_OBJ_FRAME_PATH)/%.o: $(FRAME_PATH)/%.c $(CONFIG_MK_PATH)
#ifeq ($(BLIS_ENABLE_VERBOSE_MAKE_OUTPUT),yes)
# $(CC) $(CFLAGS) -c $< -o $@
#else
# @echo "Compiling $<"
# @$(CC) $(CFLAGS) -c $< -o $@
#endif
$(BASE_OBJ_FRAME_PATH)/%.o: $(FRAME_PATH)/%.c $(CONFIG_MK_PATH)
ifeq ($(BLIS_ENABLE_VERBOSE_MAKE_OUTPUT),yes)
$(CC) $(CFLAGS) -c $< -o $@
$(CC) $(if $(findstring $(NOOPT_DIR),$@),$(CFLAGS_NOOPT),$(CFLAGS)) -c $< -o $@
else
ifeq ($(findstring $(NOOPT_DIR),$@),)
@echo "Compiling $<"
@$(CC) $(CFLAGS) -c $< -o $@
else
@echo "Compiling (no opt) $<"
@$(CC) $(CFLAGS_NOOPT) -c $< -o $@
endif
@echo "Compiling $<" $(if $(findstring $(NOOPT_DIR),$@),$(NOOPT_TEXT),)
@$(CC) $(if $(findstring $(NOOPT_DIR),$@),$(CFLAGS_NOOPT),$(CFLAGS)) -c $< -o $@
endif
$(BASE_OBJ_CONFIG_DIR)/%.o: $(CONFIG_PATH)/%.c $(CONFIG_MK_PATH)
$(BASE_OBJ_CONFIG_PATH)/%.o: $(CONFIG_PATH)/%.c $(CONFIG_MK_PATH)
ifeq ($(BLIS_ENABLE_VERBOSE_MAKE_OUTPUT),yes)
$(CC) $(CFLAGS) -c $< -o $@
$(CC) $(if $(findstring $(NOOPT_DIR),$@),$(CFLAGS_NOOPT),$(CFLAGS)) -c $< -o $@
else
@echo "Compiling $<"
@$(CC) $(CFLAGS) -c $< -o $@
@echo "Compiling $<" $(if $(findstring $(NOOPT_DIR),$@),$(NOOPT_TEXT),)
@$(CC) $(if $(findstring $(NOOPT_DIR),$@),$(CFLAGS_NOOPT),$(CFLAGS)) -c $< -o $@
endif
#ifeq ($(findstring $(NOOPT_DIR),$@),)
# --- Static library archiver rules ---
@@ -374,28 +388,28 @@ endif
# --- Install rules ---
install-libs: check $(MK_LIBS_INST_W_CONF_VERS)
install-libs: check $(MK_LIBS_INST_W_VERS_CONF)
install-headers: check $(MK_INCL_DIR_INST_W_CONF_VERS)
install-headers: check $(MK_INCL_DIR_INST_W_VERS_CONF)
$(MK_INCL_DIR_INST_W_CONF_VERS): $(MK_HEADER_FILES)
$(MK_INCL_DIR_INST_W_VERS_CONF): $(MK_HEADER_FILES)
ifeq ($(BLIS_ENABLE_VERBOSE_MAKE_OUTPUT),yes)
$(INSTALL) -m 0755 -d $(@)
$(INSTALL) -m 0644 $(MK_HEADER_FILES) $(@)
$(INSTALL) -m 0755 -d $(@)
$(INSTALL) -m 0644 $(MK_HEADER_FILES) $(@)
else
@$(INSTALL) -m 0755 -d $(@)
@echo "Installing C header files into $(@)"
@$(INSTALL) -m 0644 $(MK_HEADER_FILES) $(@)
@$(INSTALL) -m 0755 -d $(@)
@echo "Installing C header files into $(@)"
@$(INSTALL) -m 0644 $(MK_HEADER_FILES) $(@)
endif
$(INSTALL_PREFIX)/lib/%-$(CONF_VERS).a: $(BASE_LIB_DIR)/%.a
$(INSTALL_PREFIX)/lib/%-$(VERS_CONF).a: $(BASE_LIB_PATH)/%.a
ifeq ($(BLIS_ENABLE_VERBOSE_MAKE_OUTPUT),yes)
$(INSTALL) -m 0755 -d $(@D)
$(INSTALL) -m 0644 $< $@
$(INSTALL) -m 0755 -d $(@D)
$(INSTALL) -m 0644 $< $@
else
@echo "Installing $(@F) into $(INSTALL_PREFIX)/lib/"
@$(INSTALL) -m 0755 -d $(@D)
@$(INSTALL) -m 0644 $< $@
@echo "Installing $(@F) into $(INSTALL_PREFIX)/lib/"
@$(INSTALL) -m 0755 -d $(@D)
@$(INSTALL) -m 0644 $< $@
endif
@@ -405,40 +419,62 @@ install-lib-symlinks: check $(MK_LIBS_INST)
install-header-symlinks: check $(MK_INCL_DIR_INST)
$(MK_INCL_DIR_INST): $(MK_INCL_DIR_INST_W_CONF_VERS)
$(MK_INCL_DIR_INST): $(MK_INCL_DIR_INST_W_VERS_CONF)
ifeq ($(BLIS_ENABLE_VERBOSE_MAKE_OUTPUT),yes)
$(SYMLINK) $(<F) $(@F)
$(MV) $(@F) $(INSTALL_PREFIX)
$(SYMLINK) $(<F) $(@F)
$(MV) $(@F) $(INSTALL_PREFIX)
else
@echo "Installing symlink $(@F) into $(INSTALL_PREFIX)/"
@$(SYMLINK) $(<F) $(@F)
@$(MV) $(@F) $(INSTALL_PREFIX)
@echo "Installing symlink $(@F) into $(INSTALL_PREFIX)/"
@$(SYMLINK) $(<F) $(@F)
@$(MV) $(@F) $(INSTALL_PREFIX)
endif
$(INSTALL_PREFIX)/lib/%.a: $(INSTALL_PREFIX)/lib/%-$(CONF_VERS).a
$(INSTALL_PREFIX)/lib/%.a: $(INSTALL_PREFIX)/lib/%-$(VERS_CONF).a
ifeq ($(BLIS_ENABLE_VERBOSE_MAKE_OUTPUT),yes)
$(SYMLINK) $(<F) $(@F)
$(MV) $(@F) $(INSTALL_PREFIX)/lib/
$(SYMLINK) $(<F) $(@F)
$(MV) $(@F) $(INSTALL_PREFIX)/lib/
else
@echo "Installing symlink $(@F) into $(INSTALL_PREFIX)/lib/"
@$(SYMLINK) $(<F) $(@F)
@$(MV) $(@F) $(INSTALL_PREFIX)/lib/
@echo "Installing symlink $(@F) into $(INSTALL_PREFIX)/lib/"
@$(SYMLINK) $(<F) $(@F)
@$(MV) $(@F) $(INSTALL_PREFIX)/lib/
endif
# --- Clean rules ---
cleanmost: check
- $(FIND) $(BASE_OBJ_DIR) -name "*.o" | $(XARGS) $(RM_F)
- $(FIND) $(BASE_LIB_DIR) -name "*.a" | $(XARGS) $(RM_F)
ifeq ($(BLIS_ENABLE_VERBOSE_MAKE_OUTPUT),yes)
- $(FIND) $(BASE_OBJ_PATH) -name "*.o" | $(XARGS) $(RM_F)
- $(FIND) $(BASE_LIB_PATH) -name "*.a" | $(XARGS) $(RM_F)
else
@echo "Removing .o files from $(BASE_OBJ_PATH)."
@- $(FIND) $(BASE_OBJ_PATH) -name "*.o" | $(XARGS) $(RM_F)
@echo "Removing .a files from $(BASE_LIB_PATH)."
@- $(FIND) $(BASE_LIB_PATH) -name "*.a" | $(XARGS) $(RM_F)
endif
distclean: check cleanmost cleanmk
distclean: check cleanmk cleanmost
ifeq ($(BLIS_ENABLE_VERBOSE_MAKE_OUTPUT),yes)
- $(RM_F) $(CONFIG_MK_PATH)
- $(RM_RF) $(OBJ_DIR)
- $(RM_RF) $(LIB_DIR)
- $(RM_RF) $(OBJ_PATH)
- $(RM_RF) $(LIB_PATH)
else
@echo "Removing $(CONFIG_MK_PATH)."
@- $(RM_F) $(CONFIG_MK_PATH)
@echo "Removing $(OBJ_PATH)."
@- $(RM_RF) $(OBJ_PATH)
@echo "Removing $(LIB_PATH)."
@- $(RM_RF) $(LIB_PATH)
endif
cleanmk: check
- $(FIND) $(CONFIG_PATH) -name "$(FRAGMENT_MK)" | $(XARGS) $(RM_F)
- $(FIND) $(FRAME_PATH) -name "$(FRAGMENT_MK)" | $(XARGS) $(RM_F)
ifeq ($(BLIS_ENABLE_VERBOSE_MAKE_OUTPUT),yes)
- $(FIND) $(CONFIG_PATH) -name "$(FRAGMENT_MK)" | $(XARGS) $(RM_F)
- $(FIND) $(FRAME_PATH) -name "$(FRAGMENT_MK)" | $(XARGS) $(RM_F)
else
@echo "Removing makefile fragments from $(CONFIG_PATH)."
@- $(FIND) $(CONFIG_PATH) -name "$(FRAGMENT_MK)" | $(XARGS) $(RM_F)
@echo "Removing makefile fragments from $(FRAME_PATH)."
@- $(FIND) $(FRAME_PATH) -name "$(FRAGMENT_MK)" | $(XARGS) $(RM_F)
endif

View File

@@ -53,6 +53,24 @@
// -- LEVEL-1M KERNEL DEFINITIONS ----------------------------------------------
// -- packm --
#define PACKM_2XK_KERNEL packm_ref_2xk
#define PACKM_4XK_KERNEL packm_ref_4xk
#define PACKM_6XK_KERNEL packm_ref_6xk
#define PACKM_8XK_KERNEL packm_ref_8xk
// -- unpackm --
#define UNPACKM_2XK_KERNEL unpackm_ref_2xk
#define UNPACKM_4XK_KERNEL unpackm_ref_4xk
#define UNPACKM_6XK_KERNEL unpackm_ref_6xk
#define UNPACKM_8XK_KERNEL unpackm_ref_8xk
// -- LEVEL-1F KERNEL DEFINITIONS ----------------------------------------------
// -- axpy2v --

View File

@@ -72,10 +72,10 @@ INSTALL := install -c
# --- Determine the C compiler and related flags ---
CC := gcc
CPPROCFLAGS :=
CPPROCFLAGS :=
CMISCFLAGS := -std=c99 # -fopenmp -pg
CDBGFLAGS := -g
CWARNFLAGS := -Wall # -Wno-comment
CWARNFLAGS := -Wall
COPTFLAGS := -O2 -malign-double
CVECFLAGS := -msse3 -march=native # -mfpmath=sse

49
configure vendored
View File

@@ -36,7 +36,9 @@ print_usage()
{
# Echo usage info.
echo " "
echo " ${script_name}"
echo " ${script_name} (BLIS ${version})"
#echo " "
#echo " BLIS ${version}"
echo " "
echo " Field G. Van Zee"
echo " "
@@ -58,7 +60,9 @@ print_usage()
echo " "
echo " -p PREFIX install prefix"
echo " The path to which make will install buid products."
echo " If not given, PREFIX defaults to \$(HOME)/blis."
echo " If not given, PREFIX defaults to \$(HOME)/blis. If"
echo " the path refers to a directory that does not exist,"
echo " it will be created."
echo " "
echo " -q quiet"
echo " Suppress informational output. By default, configure"
@@ -75,7 +79,13 @@ main()
script_name=${0##*/}
# Option variables.
quiet_flag=""
quiet_flag=''
# The file in which the version string is kept.
version_file='version'
# The contents of version_file.
version=''
# The name of the build directory. This is where the template
# config.mk.in file lives.
@@ -104,12 +114,19 @@ main()
# The name of the directory in which object files will be kept.
obj_dir='obj'
# The name of the directory in which libraries will be kept.
lib_dir='lib'
# The install prefix flag.
install_prefix_def="${HOME}/blis"
install_prefix=''
prefix_flag=''
# Query which version of BLIS this is.
version=$(cat "${version_file}")
# Process our command line options.
while getopts ":hp:q" opt; do
case $opt in
@@ -123,6 +140,10 @@ main()
shift $(($OPTIND - 1))
# Initial message.
echo "${script_name}: starting configuration of BLIS ${version}."
# Set config_name based on the number of arguments leftover (after command
# line option processing).
if [ $# = "0" ]; then
@@ -140,7 +161,7 @@ main()
fi
# Build the config directory path
config_path="${config_dir}/${config_name}"
config_path="./${config_dir}/${config_name}"
# Set the install prefix if it was not already set when parsing the install
@@ -168,7 +189,7 @@ main()
# Create obj sub-directories (if they do not already exist).
base_obj_path="${obj_dir}/${config_name}"
base_obj_path="./${obj_dir}/${config_name}"
echo "${script_name}: creating ${base_obj_path}"
mkdir -p ${base_obj_path}
@@ -184,19 +205,25 @@ main()
mkdir -p ${obj_frame_path}
# Mirror source trees to the object directory.
echo "${script_name}: mirroring ${config_path} to ${obj_config_path}"
# Create lib directory (if it does not already exist).
base_lib_path="./${lib_dir}/${config_name}"
echo "${script_name}: creating ${base_lib_path}"
mkdir -p ${base_lib_path}
# Mirror config source tree to the object config sub-directory.
echo "${script_name}: mirroring ${config_path} to ${obj_config_path}"
${mirror_tree_sh} ${config_path} ${obj_config_path}
# Mirror frame source tree to the object frame sub-directory.
echo "${script_name}: mirroring ${frame_path} to ${obj_frame_path}"
${mirror_tree_sh} ${frame_path} ${obj_frame_path}
exit 1
# Generate makefile fragments in the chosen configuration directory.
./build/gen-make-frags/gen-make-frag.sh \
-h -r -d -v1 \
-h -r -v1 \
-o ${script_name} \
-p 'CONFIG' \
${config_path} \
@@ -207,7 +234,7 @@ exit 1
# Generate makefile fragments in the framework directory.
./build/gen-make-frags/gen-make-frag.sh \
-h -r -d -v1 \
-h -r -v1 \
-o ${script_name} \
-p 'FRAME' \
${frame_path} \

View File

@@ -62,7 +62,7 @@ void PASTEMAC(ch,opname)( \
y, incy ); \
}
INSERT_GENTFUNC_BASIC( axpyf, axpyf_opt_var1 )
INSERT_GENTFUNC_BASIC( axpyf, AXPYF_KERNEL )
//
@@ -94,13 +94,13 @@ void PASTEMAC3(cha,chx,chy,opname)( \
// Define the basic set of functions unconditionally, and then also some
// mixed datatype functions if requested.
INSERT_GENTFUNC3U12_BASIC( axpyf, axpyf_opt_var1 )
INSERT_GENTFUNC3U12_BASIC( axpyf, AXPYF_KERNEL )
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
INSERT_GENTFUNC3U12_MIX_D( axpyf, axpyf_opt_var1 )
INSERT_GENTFUNC3U12_MIX_D( axpyf, AXPYF_KERNEL )
#endif
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
INSERT_GENTFUNC3U12_MIX_P( axpyf, axpyf_opt_var1 )
INSERT_GENTFUNC3U12_MIX_P( axpyf, AXPYF_KERNEL )
#endif

View File

@@ -35,6 +35,24 @@
#include "bl2_axpyf_unb_var1.h"
//
// Define fusing factors (if they are not already defined by the user
// in bl2_kernel.h).
//
#ifndef bl2_saxpyf_fuse_fac
#define bl2_saxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_S
#endif
#ifndef bl2_daxpyf_fuse_fac
#define bl2_daxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_D
#endif
#ifndef bl2_caxpyf_fuse_fac
#define bl2_caxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_C
#endif
#ifndef bl2_zaxpyf_fuse_fac
#define bl2_zaxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_Z
#endif
//
// Prototype BLAS-like interfaces with homogeneous-typed operands.
//

View File

@@ -72,7 +72,7 @@ void PASTEMAC(ch,opname)( \
z, incz ); \
}
INSERT_GENTFUNC_BASIC( dotxaxpyf, dotxaxpyf_opt_var1 )
INSERT_GENTFUNC_BASIC( dotxaxpyf, DOTXAXPYF_KERNEL )
//
@@ -114,13 +114,13 @@ void PASTEMAC3(cha,chb,chc,opname)( \
// Define the basic set of functions unconditionally, and then also some
// mixed datatype functions if requested.
INSERT_GENTFUNC3U12_BASIC( dotxaxpyf, dotxaxpyf_opt_var1 )
INSERT_GENTFUNC3U12_BASIC( dotxaxpyf, DOTXAXPYF_KERNEL )
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
INSERT_GENTFUNC3U12_MIX_D( dotxaxpyf, dotxaxpyf_opt_var1 )
INSERT_GENTFUNC3U12_MIX_D( dotxaxpyf, DOTXAXPYF_KERNEL )
#endif
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
INSERT_GENTFUNC3U12_MIX_P( dotxaxpyf, dotxaxpyf_opt_var1 )
INSERT_GENTFUNC3U12_MIX_P( dotxaxpyf, DOTXAXPYF_KERNEL )
#endif

View File

@@ -35,6 +35,24 @@
#include "bl2_dotxaxpyf_unb_var1.h"
//
// Define fusing factors (if they are not already defined by the user
// in bl2_kernel.h).
//
#ifndef bl2_sdotxaxpyf_fuse_fac
#define bl2_sdotxaxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_S
#endif
#ifndef bl2_ddotxaxpyf_fuse_fac
#define bl2_ddotxaxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_D
#endif
#ifndef bl2_cdotxaxpyf_fuse_fac
#define bl2_cdotxaxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_C
#endif
#ifndef bl2_zdotxaxpyf_fuse_fac
#define bl2_zdotxaxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_Z
#endif
//
// Prototype BLAS-like interfaces with homogeneous-typed operands.
//

View File

@@ -35,6 +35,24 @@
#include "bl2_dotxf_unb_var1.h"
//
// Define fusing factors (if they are not already defined by the user
// in bl2_kernel.h).
//
#ifndef bl2_sdotxf_fuse_fac
#define bl2_sdotxf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_S
#endif
#ifndef bl2_ddotxf_fuse_fac
#define bl2_ddotxf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_D
#endif
#ifndef bl2_cdotxf_fuse_fac
#define bl2_cdotxf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_C
#endif
#ifndef bl2_zdotxf_fuse_fac
#define bl2_zdotxf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_Z
#endif
//
// Prototype BLAS-like interfaces with homogeneous-typed operands.
//

View File

@@ -44,94 +44,102 @@ typedef void (*FUNCPTR_T)(
void* p
);
static FUNCPTR_T ftypes[10][BLIS_NUM_FP_TYPES] =
{
// panel width = 0
{
NULL,
NULL,
NULL,
NULL,
},
// panel width = 1
{
NULL,
NULL,
NULL,
NULL,
},
// panel width = 2
{
PASTEMAC(s,packm_2xk),
PASTEMAC(c,packm_2xk),
PASTEMAC(d,packm_2xk),
PASTEMAC(z,packm_2xk),
},
// panel width = 3
{
NULL,
NULL,
NULL,
NULL,
},
// panel width = 4
{
PASTEMAC(s,packm_4xk),
PASTEMAC(c,packm_4xk),
PASTEMAC(d,packm_4xk),
PASTEMAC(z,packm_4xk),
},
// panel width = 5
{
NULL,
NULL,
NULL,
NULL,
},
// panel width = 6
{
NULL,
NULL,
NULL,
NULL,
},
// panel width = 7
{
NULL,
NULL,
NULL,
NULL,
},
// panel width = 8
{
NULL,
NULL,
NULL,
NULL,
},
// panel width = 9
{
NULL,
NULL,
NULL,
NULL,
}
#undef GENARRAY
#define GENARRAY( kername2, kername4, kername6, kername8 ) \
\
static FUNCPTR_T ftypes[10][BLIS_NUM_FP_TYPES] = \
{ \
/* panel width = 0 */ \
{ \
NULL, \
NULL, \
NULL, \
NULL, \
}, \
/* panel width = 1 */ \
{ \
NULL, \
NULL, \
NULL, \
NULL, \
}, \
/* panel width = 2 */ \
{ \
PASTEMAC(s,kername2), \
PASTEMAC(c,kername2), \
PASTEMAC(d,kername2), \
PASTEMAC(z,kername2), \
}, \
/* panel width = 3 */ \
{ \
NULL, \
NULL, \
NULL, \
NULL, \
}, \
/* panel width = 4 */ \
{ \
PASTEMAC(s,kername4), \
PASTEMAC(c,kername4), \
PASTEMAC(d,kername4), \
PASTEMAC(z,kername4), \
}, \
/* panel width = 5 */ \
{ \
NULL, \
NULL, \
NULL, \
NULL, \
}, \
/* panel width = 6 */ \
{ \
PASTEMAC(s,kername6), \
PASTEMAC(c,kername6), \
PASTEMAC(d,kername6), \
PASTEMAC(z,kername6), \
}, \
/* panel width = 7 */ \
{ \
NULL, \
NULL, \
NULL, \
NULL, \
}, \
/* panel width = 8 */ \
{ \
PASTEMAC(s,kername8), \
PASTEMAC(c,kername8), \
PASTEMAC(d,kername8), \
PASTEMAC(z,kername8), \
}, \
/* panel width = 9 */ \
{ \
NULL, \
NULL, \
NULL, \
NULL, \
} \
};
GENARRAY( PACKM_2XK_KERNEL,
PACKM_4XK_KERNEL,
PACKM_6XK_KERNEL,
PACKM_8XK_KERNEL )
#undef GENTFUNC
#define GENTFUNC( ctype, ch, opname, varname ) \
#define GENTFUNC( ctype, ch, opname, copyvker ) \
\
void PASTEMAC(ch,varname)( \
conj_t conja, \
dim_t m, \
dim_t n, \
void* beta, \
void* a, inc_t inca, inc_t lda, \
void* p, inc_t ldp \
) \
void PASTEMAC(ch,opname)( \
conj_t conja, \
dim_t m, \
dim_t n, \
void* beta, \
void* a, inc_t inca, inc_t lda, \
void* p, inc_t ldp \
) \
{ \
dim_t panel_dim; \
num_t dt; \
@@ -145,6 +153,17 @@ void PASTEMAC(ch,varname)( \
\
/* Index into the array to extract the correct function pointer. */ \
f = ftypes[panel_dim][dt]; \
\
/* If the panel dimension is unit, then we recognize that this allows
the kernel to reduce to a copyv, so we call that kernel directly. */ \
if ( m == 1 ) \
{ \
PASTEMAC2(ch,ch,copyvker)( conja, \
n, \
a, lda, \
p, 1 ); \
return; \
} \
\
/* If there exists a kernel implementation for the panel dimension
provided, and the "width" of the panel is equal to the leading
@@ -174,5 +193,5 @@ void PASTEMAC(ch,varname)( \
} \
}
INSERT_GENTFUNC_BASIC( packm_cxk, packm_cxk )
INSERT_GENTFUNC_BASIC( packm_cxk, COPYV_KERNEL )

View File

@@ -32,9 +32,11 @@
*/
// Include headers for various packm micro-kernels.
#include "bl2_packm_2xk.h"
#include "bl2_packm_4xk.h"
// Include headers for various packm kernels.
#include "bl2_packm_ref_2xk.h"
#include "bl2_packm_ref_4xk.h"
#include "bl2_packm_ref_6xk.h"
#include "bl2_packm_ref_8xk.h"
#undef GENTPROT

View File

@@ -42,9 +42,11 @@ void PASTEMAC(ch,varname)( \
dim_t n, \
void* beta, \
void* a, inc_t inca, inc_t lda, \
void* p \
void* p \
) \
{ \
const inc_t ldp = 2; \
\
ctype* restrict beta_cast = beta; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
@@ -59,7 +61,7 @@ void PASTEMAC(ch,varname)( \
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
\
alpha1 += lda; \
pi1 += 2; \
pi1 += ldp; \
} \
} \
else \
@@ -70,7 +72,7 @@ void PASTEMAC(ch,varname)( \
PASTEMAC2(ch,ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
\
alpha1 += lda; \
pi1 += 2; \
pi1 += ldp; \
} \
} \
} \
@@ -84,7 +86,7 @@ void PASTEMAC(ch,varname)( \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
\
alpha1 += lda; \
pi1 += 2; \
pi1 += ldp; \
} \
} \
else \
@@ -95,11 +97,11 @@ void PASTEMAC(ch,varname)( \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
\
alpha1 += lda; \
pi1 += 2; \
pi1 += ldp; \
} \
} \
} \
}
INSERT_GENTFUNC_BASIC( packm_2xk, packm_2xk )
INSERT_GENTFUNC_BASIC( packm_ref_6xk, packm_ref_6xk )

View File

@@ -43,4 +43,4 @@ void PASTEMAC(ch,varname)( \
void* p \
);
INSERT_GENTPROT_BASIC( packm_2xk )
INSERT_GENTPROT_BASIC( packm_ref_2xk )

View File

@@ -45,6 +45,8 @@ void PASTEMAC(ch,varname)( \
void* p \
) \
{ \
const inc_t ldp = 4; \
\
ctype* restrict beta_cast = beta; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
@@ -61,7 +63,7 @@ void PASTEMAC(ch,varname)( \
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
\
alpha1 += lda; \
pi1 += 4; \
pi1 += ldp; \
} \
} \
else \
@@ -74,7 +76,7 @@ void PASTEMAC(ch,varname)( \
PASTEMAC2(ch,ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
\
alpha1 += lda; \
pi1 += 4; \
pi1 += ldp; \
} \
} \
} \
@@ -90,7 +92,7 @@ void PASTEMAC(ch,varname)( \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
\
alpha1 += lda; \
pi1 += 4; \
pi1 += ldp; \
} \
} \
else \
@@ -103,11 +105,11 @@ void PASTEMAC(ch,varname)( \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
\
alpha1 += lda; \
pi1 += 4; \
pi1 += ldp; \
} \
} \
} \
}
INSERT_GENTFUNC_BASIC( packm_4xk, packm_4xk )
INSERT_GENTFUNC_BASIC( packm_ref_6xk, packm_ref_6xk )

View File

@@ -43,4 +43,4 @@ void PASTEMAC(ch,varname)( \
void* p \
);
INSERT_GENTPROT_BASIC( packm_2xk )
INSERT_GENTPROT_BASIC( packm_ref_4xk )

View File

@@ -45,6 +45,8 @@ void PASTEMAC(ch,varname)( \
void* p \
) \
{ \
const inc_t ldp = 6; \
\
ctype* restrict beta_cast = beta; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
@@ -59,9 +61,11 @@ void PASTEMAC(ch,varname)( \
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
\
alpha1 += lda; \
pi1 += 4; \
pi1 += ldp; \
} \
} \
else \
@@ -72,9 +76,11 @@ void PASTEMAC(ch,varname)( \
PASTEMAC2(ch,ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC2(ch,ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC2(ch,ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC2(ch,ch,copys)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC2(ch,ch,copys)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
\
alpha1 += lda; \
pi1 += 4; \
pi1 += ldp; \
} \
} \
} \
@@ -88,9 +94,11 @@ void PASTEMAC(ch,varname)( \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
\
alpha1 += lda; \
pi1 += 4; \
pi1 += ldp; \
} \
} \
else \
@@ -101,13 +109,15 @@ void PASTEMAC(ch,varname)( \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
\
alpha1 += lda; \
pi1 += 4; \
pi1 += ldp; \
} \
} \
} \
}
INSERT_GENTFUNC_BASIC( packm_4xk, packm_4xk )
INSERT_GENTFUNC_BASIC( packm_ref_6xk, packm_ref_6xk )

View File

@@ -43,4 +43,4 @@ void PASTEMAC(ch,varname)( \
void* p \
);
INSERT_GENTPROT_BASIC( packm_4xk )
INSERT_GENTPROT_BASIC( packm_ref_6xk )

View File

@@ -42,9 +42,11 @@ void PASTEMAC(ch,varname)( \
dim_t n, \
void* beta, \
void* a, inc_t inca, inc_t lda, \
void* p \
void* p \
) \
{ \
const inc_t ldp = 8; \
\
ctype* restrict beta_cast = beta; \
ctype* restrict alpha1 = a; \
ctype* restrict pi1 = p; \
@@ -57,9 +59,15 @@ void PASTEMAC(ch,varname)( \
{ \
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC2(ch,ch,copyjs)( *(alpha1 + 7*inca), *(pi1 + 7) ); \
\
alpha1 += lda; \
pi1 += 2; \
pi1 += ldp; \
} \
} \
else \
@@ -68,9 +76,15 @@ void PASTEMAC(ch,varname)( \
{ \
PASTEMAC2(ch,ch,copys)( *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC2(ch,ch,copys)( *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC2(ch,ch,copys)( *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC2(ch,ch,copys)( *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC2(ch,ch,copys)( *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC2(ch,ch,copys)( *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC2(ch,ch,copys)( *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC2(ch,ch,copys)( *(alpha1 + 7*inca), *(pi1 + 7) ); \
\
alpha1 += lda; \
pi1 += 2; \
pi1 += ldp; \
} \
} \
} \
@@ -82,9 +96,15 @@ void PASTEMAC(ch,varname)( \
{ \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
\
alpha1 += lda; \
pi1 += 2; \
pi1 += ldp; \
} \
} \
else \
@@ -93,13 +113,19 @@ void PASTEMAC(ch,varname)( \
{ \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 0*inca), *(pi1 + 0) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 1*inca), *(pi1 + 1) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 2*inca), *(pi1 + 2) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 3*inca), *(pi1 + 3) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 4*inca), *(pi1 + 4) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 5*inca), *(pi1 + 5) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 6*inca), *(pi1 + 6) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(alpha1 + 7*inca), *(pi1 + 7) ); \
\
alpha1 += lda; \
pi1 += 2; \
pi1 += ldp; \
} \
} \
} \
}
INSERT_GENTFUNC_BASIC( packm_2xk, packm_2xk )
INSERT_GENTFUNC_BASIC( packm_ref_6xk, packm_ref_6xk )

View File

@@ -43,4 +43,4 @@ void PASTEMAC(ch,varname)( \
void* p \
);
INSERT_GENTPROT_BASIC( packm_4xk )
INSERT_GENTPROT_BASIC( packm_ref_8xk )

View File

@@ -1,97 +0,0 @@
#!/bin/bash
#
# BLIS
# An object-based framework for developing high-performance BLAS-like
# libraries.
#
# Copyright (C) 2012, The University of Texas
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# - Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# - Neither the name of The University of Texas nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
#
main()
{
arch_dir="$1"
# Usage:
# Example 1: ./set_symlink.sh generic
# Example 2: ./set_symlink.sh x86
if [ ! -d "${arch_dir}" ]; then
echo "$0: Directory for architecture ${arch_dir} does not exist!. Exiting."
exit 1
fi
kernels="bl2_packm_2xk bl2_packm_4xk"
for t in $kernels; do
# Build the filename for the current kernel type.
oldlink_c="${t}.c"
oldlink_h="${t}.h"
# Verify that symlinks to the current kernel type exist; if so, remove them.
if [ ! -f "${oldlink_c}" ]; then
echo "$0: File ${oldlink_c} does not exist. Skipping removal."
else
echo "$0: removing ${oldlink_c}"
rm -f ${oldlink_c}
fi
if [ ! -f "${oldlink_h}" ]; then
echo "$0: File ${oldlink_h} does not exist. Skipping removal."
else
echo "$0: removing ${oldlink_h}"
rm -f ${oldlink_h}
fi
# Build the filename for the current kernel type in the desired
# directory.
newfile_c="${arch_dir}/${t}.c"
newfile_h="${arch_dir}/${t}.h"
# Verify that the files exist; if so, symlink them.
if [ ! -f "${newfile_c}" ]; then
echo "$0: Attempted to symlink file ${newfile_c}, which does not exist! Skipping."
else
echo "$0: symlinking (and touching) ${newfile_c}"
ln -s ${newfile_c}
touch ${newfile_c}
fi
if [ ! -f "${newfile_h}" ]; then
echo "$0: Attempted to symlink file ${newfile_h}, which does not exist! Skipping."
else
echo "$0: symlinking ${newfile_h}"
ln -s ${newfile_h}
fi
done
}
main "$@"

View File

@@ -44,94 +44,103 @@ typedef void (*FUNCPTR_T)(
void* a, inc_t inca, inc_t lda
);
static FUNCPTR_T ftypes[10][BLIS_NUM_FP_TYPES] =
{
// panel width = 0
{
NULL,
NULL,
NULL,
NULL,
},
// panel width = 1
{
NULL,
NULL,
NULL,
NULL,
},
// panel width = 2
{
PASTEMAC(s,unpackm_2xk),
PASTEMAC(c,unpackm_2xk),
PASTEMAC(d,unpackm_2xk),
PASTEMAC(z,unpackm_2xk),
},
// panel width = 3
{
NULL,
NULL,
NULL,
NULL,
},
// panel width = 4
{
PASTEMAC(s,unpackm_4xk),
PASTEMAC(c,unpackm_4xk),
PASTEMAC(d,unpackm_4xk),
PASTEMAC(z,unpackm_4xk),
},
// panel width = 5
{
NULL,
NULL,
NULL,
NULL,
},
// panel width = 6
{
NULL,
NULL,
NULL,
NULL,
},
// panel width = 7
{
NULL,
NULL,
NULL,
NULL,
},
// panel width = 8
{
NULL,
NULL,
NULL,
NULL,
},
// panel width = 9
{
NULL,
NULL,
NULL,
NULL,
}
#undef GENARRAY
#define GENARRAY( kername2, kername4, kername6, kername8 ) \
\
static FUNCPTR_T ftypes[10][BLIS_NUM_FP_TYPES] = \
{ \
/* panel width = 0 */ \
{ \
NULL, \
NULL, \
NULL, \
NULL, \
}, \
/* panel width = 1 */ \
{ \
NULL, \
NULL, \
NULL, \
NULL, \
}, \
/* panel width = 2 */ \
{ \
PASTEMAC(s,kername2), \
PASTEMAC(c,kername2), \
PASTEMAC(d,kername2), \
PASTEMAC(z,kername2), \
}, \
/* panel width = 3 */ \
{ \
NULL, \
NULL, \
NULL, \
NULL, \
}, \
/* panel width = 4 */ \
{ \
PASTEMAC(s,kername4), \
PASTEMAC(c,kername4), \
PASTEMAC(d,kername4), \
PASTEMAC(z,kername4), \
}, \
/* panel width = 5 */ \
{ \
NULL, \
NULL, \
NULL, \
NULL, \
}, \
/* panel width = 6 */ \
{ \
PASTEMAC(s,kername6), \
PASTEMAC(c,kername6), \
PASTEMAC(d,kername6), \
PASTEMAC(z,kername6), \
}, \
/* panel width = 7 */ \
{ \
NULL, \
NULL, \
NULL, \
NULL, \
}, \
/* panel width = 8 */ \
{ \
PASTEMAC(s,kername8), \
PASTEMAC(c,kername8), \
PASTEMAC(d,kername8), \
PASTEMAC(z,kername8), \
}, \
/* panel width = 9 */ \
{ \
NULL, \
NULL, \
NULL, \
NULL, \
} \
};
GENARRAY( UNPACKM_2XK_KERNEL,
UNPACKM_4XK_KERNEL,
UNPACKM_6XK_KERNEL,
UNPACKM_8XK_KERNEL )
#undef GENTFUNC
#define GENTFUNC( ctype, ch, opname, varname ) \
#define GENTFUNC( ctype, ch, opname, copyvker ) \
\
void PASTEMAC(ch,varname)( \
conj_t conjp, \
dim_t m, \
dim_t n, \
void* beta, \
void* p, inc_t ldp, \
void* a, inc_t inca, inc_t lda \
) \
void PASTEMAC(ch,opname)( \
conj_t conjp, \
dim_t m, \
dim_t n, \
void* beta, \
void* p, inc_t ldp, \
void* a, inc_t inca, inc_t lda \
) \
{ \
dim_t panel_dim; \
num_t dt; \
@@ -145,6 +154,17 @@ void PASTEMAC(ch,varname)( \
\
/* Index into the array to extract the correct function pointer. */ \
f = ftypes[panel_dim][dt]; \
\
/* If the panel dimension is unit, then we recognize that this allows
the kernel to reduce to a copyv, so we call that kernel directly. */ \
if ( m == 1 ) \
{ \
PASTEMAC2(ch,ch,copyvker)( conjp, \
n, \
p, 1, \
a, lda ); \
return; \
} \
\
/* If there exists a kernel implementation for the panel dimension
provided, and the "width" of the panel is equal to the leading
@@ -174,5 +194,5 @@ void PASTEMAC(ch,varname)( \
} \
}
INSERT_GENTFUNC_BASIC( unpackm_cxk, unpackm_cxk )
INSERT_GENTFUNC_BASIC( unpackm_cxk, COPYV_KERNEL )

View File

@@ -32,9 +32,11 @@
*/
// Include headers for various unpackm micro-kernels.
#include "bl2_unpackm_2xk.h"
#include "bl2_unpackm_4xk.h"
// Include headers for various unpackm kernels.
#include "bl2_unpackm_ref_2xk.h"
#include "bl2_unpackm_ref_4xk.h"
#include "bl2_unpackm_ref_6xk.h"
#include "bl2_unpackm_ref_8xk.h"
#undef GENTPROT

View File

@@ -45,6 +45,8 @@ void PASTEMAC(ch,varname)( \
void* a, inc_t inca, inc_t lda \
) \
{ \
const inc_t ldp = 2; \
\
ctype* restrict beta_cast = beta; \
ctype* restrict pi1 = p; \
ctype* restrict alpha1 = a; \
@@ -58,7 +60,7 @@ void PASTEMAC(ch,varname)( \
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
\
pi1 += 2; \
pi1 += ldp; \
alpha1 += lda; \
} \
} \
@@ -69,7 +71,7 @@ void PASTEMAC(ch,varname)( \
PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
\
pi1 += 2; \
pi1 += ldp; \
alpha1 += lda; \
} \
} \
@@ -83,7 +85,7 @@ void PASTEMAC(ch,varname)( \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
\
pi1 += 2; \
pi1 += ldp; \
alpha1 += lda; \
} \
} \
@@ -94,12 +96,12 @@ void PASTEMAC(ch,varname)( \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
\
pi1 += 2; \
pi1 += ldp; \
alpha1 += lda; \
} \
} \
} \
}
INSERT_GENTFUNC_BASIC( unpackm_2xk, unpackm_2xk )
INSERT_GENTFUNC_BASIC( unpackm_ref_4xk, unpackm_ref_4xk )

View File

@@ -43,4 +43,4 @@ void PASTEMAC(ch,varname)( \
void* a, inc_t inca, inc_t lda \
);
INSERT_GENTPROT_BASIC( unpackm_2xk )
INSERT_GENTPROT_BASIC( unpackm_ref_2xk )

View File

@@ -45,6 +45,8 @@ void PASTEMAC(ch,varname)( \
void* a, inc_t inca, inc_t lda \
) \
{ \
const inc_t ldp = 4; \
\
ctype* restrict beta_cast = beta; \
ctype* restrict pi1 = p; \
ctype* restrict alpha1 = a; \
@@ -60,7 +62,7 @@ void PASTEMAC(ch,varname)( \
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
\
pi1 += 4; \
pi1 += ldp; \
alpha1 += lda; \
} \
} \
@@ -73,7 +75,7 @@ void PASTEMAC(ch,varname)( \
PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
\
pi1 += 4; \
pi1 += ldp; \
alpha1 += lda; \
} \
} \
@@ -89,7 +91,7 @@ void PASTEMAC(ch,varname)( \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
\
pi1 += 4; \
pi1 += ldp; \
alpha1 += lda; \
} \
} \
@@ -102,12 +104,12 @@ void PASTEMAC(ch,varname)( \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
\
pi1 += 4; \
pi1 += ldp; \
alpha1 += lda; \
} \
} \
} \
}
INSERT_GENTFUNC_BASIC( unpackm_4xk, unpackm_4xk )
INSERT_GENTFUNC_BASIC( unpackm_ref_4xk, unpackm_ref_4xk )

View File

@@ -43,4 +43,4 @@ void PASTEMAC(ch,varname)( \
void* a, inc_t inca, inc_t lda \
);
INSERT_GENTPROT_BASIC( unpackm_4xk )
INSERT_GENTPROT_BASIC( unpackm_ref_4xk )

View File

@@ -45,6 +45,8 @@ void PASTEMAC(ch,varname)( \
void* a, inc_t inca, inc_t lda \
) \
{ \
const inc_t ldp = 6; \
\
ctype* restrict beta_cast = beta; \
ctype* restrict pi1 = p; \
ctype* restrict alpha1 = a; \
@@ -59,8 +61,10 @@ void PASTEMAC(ch,varname)( \
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
\
pi1 += 4; \
pi1 += ldp; \
alpha1 += lda; \
} \
} \
@@ -72,8 +76,10 @@ void PASTEMAC(ch,varname)( \
PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
\
pi1 += 4; \
pi1 += ldp; \
alpha1 += lda; \
} \
} \
@@ -88,8 +94,10 @@ void PASTEMAC(ch,varname)( \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
\
pi1 += 4; \
pi1 += ldp; \
alpha1 += lda; \
} \
} \
@@ -101,13 +109,15 @@ void PASTEMAC(ch,varname)( \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
\
pi1 += 4; \
pi1 += ldp; \
alpha1 += lda; \
} \
} \
} \
}
INSERT_GENTFUNC_BASIC( unpackm_4xk, unpackm_4xk )
INSERT_GENTFUNC_BASIC( unpackm_ref_6xk, unpackm_ref_6xk )

View File

@@ -43,4 +43,4 @@ void PASTEMAC(ch,varname)( \
void* a, inc_t inca, inc_t lda \
);
INSERT_GENTPROT_BASIC( unpackm_2xk )
INSERT_GENTPROT_BASIC( unpackm_ref_6xk )

View File

@@ -45,6 +45,8 @@ void PASTEMAC(ch,varname)( \
void* a, inc_t inca, inc_t lda \
) \
{ \
const inc_t ldp = 8; \
\
ctype* restrict beta_cast = beta; \
ctype* restrict pi1 = p; \
ctype* restrict alpha1 = a; \
@@ -57,8 +59,14 @@ void PASTEMAC(ch,varname)( \
{ \
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
\
pi1 += 2; \
pi1 += ldp; \
alpha1 += lda; \
} \
} \
@@ -68,8 +76,14 @@ void PASTEMAC(ch,varname)( \
{ \
PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \
\
pi1 += 2; \
pi1 += ldp; \
alpha1 += lda; \
} \
} \
@@ -82,8 +96,14 @@ void PASTEMAC(ch,varname)( \
{ \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
\
pi1 += 2; \
pi1 += ldp; \
alpha1 += lda; \
} \
} \
@@ -93,13 +113,19 @@ void PASTEMAC(ch,varname)( \
{ \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \
PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \
\
pi1 += 2; \
pi1 += ldp; \
alpha1 += lda; \
} \
} \
} \
}
INSERT_GENTFUNC_BASIC( unpackm_2xk, unpackm_2xk )
INSERT_GENTFUNC_BASIC( unpackm_ref_6xk, unpackm_ref_6xk )

View File

@@ -43,4 +43,4 @@ void PASTEMAC(ch,varname)( \
void* a, inc_t inca, inc_t lda \
);
INSERT_GENTPROT_BASIC( unpackm_4xk )
INSERT_GENTPROT_BASIC( unpackm_ref_8xk )

View File

@@ -1,97 +0,0 @@
#!/bin/bash
#
# BLIS
# An object-based framework for developing high-performance BLAS-like
# libraries.
#
# Copyright (C) 2012, The University of Texas
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# - Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# - Neither the name of The University of Texas nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
#
main()
{
arch_dir="$1"
# Usage:
# Example 1: ./set_symlink.sh generic
# Example 2: ./set_symlink.sh x86
if [ ! -d "${arch_dir}" ]; then
echo "$0: Directory for architecture ${arch_dir} does not exist!. Exiting."
exit 1
fi
kernels="bl2_unpackm_2xk bl2_unpackm_4xk"
for t in $kernels; do
# Build the filename for the current kernel type.
oldlink_c="${t}.c"
oldlink_h="${t}.h"
# Verify that symlinks to the current kernel type exist; if so, remove them.
if [ ! -f "${oldlink_c}" ]; then
echo "$0: File ${oldlink_c} does not exist. Skipping removal."
else
echo "$0: removing ${oldlink_c}"
rm -f ${oldlink_c}
fi
if [ ! -f "${oldlink_h}" ]; then
echo "$0: File ${oldlink_h} does not exist. Skipping removal."
else
echo "$0: removing ${oldlink_h}"
rm -f ${oldlink_h}
fi
# Build the filename for the current kernel type in the desired
# directory.
newfile_c="${arch_dir}/${t}.c"
newfile_h="${arch_dir}/${t}.h"
# Verify that the files exist; if so, symlink them.
if [ ! -f "${newfile_c}" ]; then
echo "$0: Attempted to symlink file ${newfile_c}, which does not exist! Skipping."
else
echo "$0: symlinking (and touching) ${newfile_c}"
ln -s ${newfile_c}
touch ${newfile_c}
fi
if [ ! -f "${newfile_h}" ]; then
echo "$0: Attempted to symlink file ${newfile_h}, which does not exist! Skipping."
else
echo "$0: symlinking ${newfile_h}"
ln -s ${newfile_h}
fi
done
}
main "$@"

View File

@@ -37,24 +37,18 @@
#define FUNCPTR_T machval_fp
typedef void (*FUNCPTR_T)(
machval_t machval,
void* val
machval_t mval,
void* v
);
// Manually initialize a function pointer array.
static FUNCPTR_T ftypes[BLIS_NUM_FP_TYPES] =
{
bl2_smachval,
NULL,
bl2_dmachval,
NULL
};
static FUNCPTR_T GENARRAY(ftypes,machval);
//
// Define object-based interface.
//
void bl2_machval( machval_t machval,
void bl2_machval( machval_t mval,
obj_t* v )
{
num_t dt_v = bl2_obj_datatype( *v );
@@ -67,7 +61,7 @@ void bl2_machval( machval_t machval,
f = ftypes[dt_v];
// Invoke the function.
f( machval,
f( mval,
buf_v );
}
@@ -75,34 +69,37 @@ void bl2_machval( machval_t machval,
//
// Define BLAS-like interfaces.
//
#undef GENTFUNC
#define GENTFUNC3( ctype, ctype_r, ch, chr, opname, varname ) \
#undef GENTFUNC1R
#define GENTFUNC1R( ctype, ctype_r, ch, chr, opname, varname ) \
\
void PASTEMAC(ch,opname)( \
machvar_t machval, \
ctype* val, \
machval_t mval, \
void* v \
) \
{ \
static ctype_r pvals[ BLIS_NUM_MACH_PARAMS ]; \
\
static bool_t first_time = TRUE; \
dim_t val_i = machval - BLIS_MACH_PARAM_FIRST; \
\
dim_t val_i = mval - BLIS_MACH_PARAM_FIRST; \
ctype* v_cast = v; \
\
/* If this is the first time through, call the underlying
code to discover each machine parameter. */ \
if ( first_time ) \
{ \
char lapack_machval; \
dim_t i; \
char lapack_mval; \
dim_t m, i; \
\
for( m = BLIS_MACH_PARAM_FIRST, i = 0; \
m <= BLIS_MACH_PARAM_LAST; \
++m, ++i ) \
for( i = 0, m = BLIS_MACH_PARAM_FIRST; \
i < BLIS_NUM_MACH_PARAMS - 1; \
++i, ++m ) \
{ \
bl2_param_map_to_netlib_machval( m, &lapack_machval ); \
bl2_param_map_blis_to_netlib_machval( m, &lapack_mval ); \
\
/*printf( "bl2_machval: querying %u %c\n", m, lapack_machval );*/ \
/*printf( "bl2_machval: querying %u %c\n", m, lapack_mval );*/ \
\
pvals[i] = PASTEMAC(chr,varname)( &lapack_machval, 1 ); \
pvals[i] = PASTEMAC(chr,varname)( &lapack_mval, 1 ); \
\
/*printf( "bl2_machval: got back %34.29e\n", pvals[i] ); */ \
} \
@@ -116,12 +113,11 @@ void PASTEMAC(ch,opname)( \
/* Copy the requested parameter value to the output buffer, which
may involve a demotion from the complex to real domain. */ \
PASTEMAC2(chr,ch,copys)( pvals[ val_i ], \
*val ); \
*v_cast ); \
}
GENTFUNC( float, float, s, s, machval, lamch )
GENTFUNC( double, double, d, d, machval, lamch )
GENTFUNC( scomplex, float, c, s, machval, lamch )
GENTFUNC( dcomplex, double, z, d, machval, lamch )
GENTFUNC1R( float, float, s, s, machval, lamch )
GENTFUNC1R( double, double, d, d, machval, lamch )
GENTFUNC1R( scomplex, float, c, s, machval, lamch )
GENTFUNC1R( dcomplex, double, z, d, machval, lamch )

View File

@@ -32,10 +32,14 @@
*/
#include "bl2_lsame.h"
#include "bl2_slamch.h"
#include "bl2_dlamch.h"
//
// Prototype object-based interface.
//
void bl2_machval( machval_t machval,
void bl2_machval( machval_t mval,
obj_t* v );
@@ -46,8 +50,8 @@ void bl2_machval( machval_t machval,
#define GENTPROT( ctype, ch, opname ) \
\
void PASTEMAC(ch,opname)( \
machval_t machval, \
ctype* val \
machval_t mval, \
void* v \
);
INSERT_GENTPROT_BASIC( machval )

View File

@@ -49,8 +49,8 @@ void bl2_param_map_blis_to_netlib_trans( trans_t trans, char* blas_trans )
void bl2_param_map_blis_to_netlib_uplo( uplo_t uplo, char* blas_uplo )
{
if ( uplo == BLIS_LOWER_TRIANGULAR ) *blas_uplo = 'L';
else if ( uplo == BLIS_UPPER_TRIANGULAR ) *blas_uplo = 'U';
if ( uplo == BLIS_LOWER ) *blas_uplo = 'L';
else if ( uplo == BLIS_UPPER ) *blas_uplo = 'U';
else
{
bl2_check_error_code( BLIS_INVALID_UPLO );
@@ -111,8 +111,8 @@ void bl2_param_map_netlib_to_blis_trans( char* trans, trans_t* blis_trans )
void bl2_param_map_netlib_to_blis_uplo( char* uplo, uplo_t* blis_uplo )
{
if ( *uplo == 'l' || *uplo == 'L' ) *blis_uplo = BLIS_LOWER_TRIANGULAR;
else if ( *uplo == 'u' || *uplo == 'U' ) *blis_uplo = BLIS_UPPER_TRIANGULAR;
if ( *uplo == 'l' || *uplo == 'L' ) *blis_uplo = BLIS_LOWER;
else if ( *uplo == 'u' || *uplo == 'U' ) *blis_uplo = BLIS_UPPER;
else
{
bl2_check_error_code( BLIS_INVALID_UPLO );
@@ -156,8 +156,8 @@ void bl2_param_map_char_to_blis_trans( char* trans, trans_t* blis_trans )
void bl2_param_map_char_to_blis_uplo( char* uplo, uplo_t* blis_uplo )
{
if ( *uplo == 'l' || *uplo == 'L' ) *blis_uplo = BLIS_LOWER_TRIANGULAR;
else if ( *uplo == 'u' || *uplo == 'U' ) *blis_uplo = BLIS_UPPER_TRIANGULAR;
if ( *uplo == 'l' || *uplo == 'L' ) *blis_uplo = BLIS_LOWER;
else if ( *uplo == 'u' || *uplo == 'U' ) *blis_uplo = BLIS_UPPER;
else
{
bl2_check_error_code( BLIS_INVALID_UPLO );

View File

@@ -9,6 +9,8 @@ extern "C" {
#include "bl2_f2c.h"
#include "stdio.h"
double bl2_pow_di( doublereal* a, integer* n );
/* Table of constant values */
//static integer c__1 = 1;

View File

@@ -0,0 +1,35 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2012, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
doublereal bl2_dlamch( char* cmach, ftnlen cmach_len );

View File

@@ -0,0 +1,35 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2012, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
logical bl2_lsame( char* ca, char* cb, ftnlen ca_len, ftnlen cb_len );

View File

@@ -9,6 +9,8 @@ extern "C" {
#include "bl2_f2c.h"
#include "stdio.h"
double bl2_pow_ri( real* a, integer* n );
/* Table of constant values */
//static integer c__1 = 1;

View File

@@ -0,0 +1,35 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2012, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
real bl2_slamch( char* cmach, ftnlen cmach_len );

View File

@@ -368,7 +368,7 @@ typedef enum
typedef enum
{
BLIS_MACH_EPS,
BLIS_MACH_EPS = 0,
BLIS_MACH_SFMIN,
BLIS_MACH_BASE,
BLIS_MACH_PREC,

View File

@@ -62,11 +62,8 @@ extern "C" {
#include <stdlib.h>
#include <math.h>
// Handle the results of checking for time.h and sys/time.h.
// gettimeofday() needs this.
#if HAVE_SYS_TIME_H
#include <sys/time.h>
#endif
#include <sys/time.h>
#include <time.h>
@@ -79,6 +76,8 @@ extern "C" {
#include "bl2_extern_defs.h"
#include "bl2_f2c.h"
// -- Base operation prototypes --

View File

@@ -1 +1 @@
0.1-alpha0
0.1-alpha-0