Deprecated 'flame' configuration.

Details:
- Removed 'flame' configuration, as it was horribly out-of-date.
- Comment changes to bl2_blocksize.c and bl2_mem.c.
This commit is contained in:
Field G. Van Zee
2013-03-22 15:07:01 -05:00
parent bba38cf4e9
commit 718888849c
7 changed files with 4 additions and 463 deletions

View File

@@ -1,172 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2013, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_ARCH_H
#define BLIS_ARCH_H
// -- General floating-point constants --
#define BLIS_NUM_FP_TYPES 4
#define BLIS_MAX_TYPE_SIZE sizeof(dcomplex)
// -- Maximum offset of an element that might be pre-loaded/prefetched --
#define BLIS_MAX_PREFETCH_BYTE_OFFSET 128
// -- Page size --
#define BLIS_PAGE_SIZE 4096
// -- Number of elements per vector register --
#define BLIS_NUM_ELEM_PER_REG_S 4
#define BLIS_NUM_ELEM_PER_REG_D 2
#define BLIS_NUM_ELEM_PER_REG_C 2
#define BLIS_NUM_ELEM_PER_REG_Z 1
// -- Default fusing factors for level-1 fused operations --
#define BLIS_DEFAULT_FUSING_FACTOR_S 8
#define BLIS_DEFAULT_FUSING_FACTOR_D 4
#define BLIS_DEFAULT_FUSING_FACTOR_C 4
#define BLIS_DEFAULT_FUSING_FACTOR_Z 2
// -- Default cache blocksizes --
// Constraints:
//
// (1) MC must be a multiple of:
// (a) MR (for zero-padding purposes) and
// (b) NR.
// (2) NC must be a multiple of
// (a) NR (for zero-padding purposes) and
// (b) MR.
// (3) KC does not need to be multiple of anything, unless the micro-kernel
// specifically requires it (and typically it does not).
//
// NOTE: For BLIS libraries built on block-panel macro-kernels, constraint
// (2b) is relaxed. In this case, (1b) is needed for operation implementations
// involving matrices with diagonals (trmm, trsm). In these cases, we want the
// diagonal offset of any panel of packed matrix A to have a diagonal offset
// that is a multiple of MR. If, instead, the library were to be built on
// block-panel macro-kernels, matrix B would be the one with structure, not A,
// and thus it would be constraint (2b) that would be needed instead of (1b).
//
#define BLIS_DEFAULT_MC_S 128
#define BLIS_DEFAULT_KC_S 256
#define BLIS_DEFAULT_NC_S 8192
#define BLIS_DEFAULT_MC_D 128
#define BLIS_DEFAULT_KC_D 256
#define BLIS_DEFAULT_NC_D 8192
#define BLIS_DEFAULT_MC_C 128
#define BLIS_DEFAULT_KC_C 256
#define BLIS_DEFAULT_NC_C 8192
#define BLIS_DEFAULT_MC_Z 128
#define BLIS_DEFAULT_KC_Z 256
#define BLIS_DEFAULT_NC_Z 8192
// -- Default register blocksizes for inner kernel --
#define BLIS_DEFAULT_MR_S 8
#define BLIS_DEFAULT_NR_S 2
#define BLIS_DEFAULT_MR_D 4
#define BLIS_DEFAULT_NR_D 4
#define BLIS_DEFAULT_MR_C 4
#define BLIS_DEFAULT_NR_C 1
#define BLIS_DEFAULT_MR_Z 2
#define BLIS_DEFAULT_NR_Z 1
// NOTE: If the micro-kernel, which is typically unrolled to a factor
// of f, handles leftover edge cases (ie: when k % f > 0) then these
// register blocksizes in the k dimension can be defined to 1.
#define BLIS_DEFAULT_KR_S 1
#define BLIS_DEFAULT_KR_D 1
#define BLIS_DEFAULT_KR_C 1
#define BLIS_DEFAULT_KR_Z 1
// -- Default switch for duplication of B --
// NOTE: If BLIS_DEFAULT_DUPLICATE_B is set to FALSE, then the
// NUM_DUPL definitions are not used.
#define BLIS_DEFAULT_DUPLICATE_B TRUE
#define BLIS_DEFAULT_NUM_DUPL_S BLIS_NUM_ELEM_PER_REG_S
#define BLIS_DEFAULT_NUM_DUPL_D BLIS_NUM_ELEM_PER_REG_D
#define BLIS_DEFAULT_NUM_DUPL_C BLIS_NUM_ELEM_PER_REG_C
#define BLIS_DEFAULT_NUM_DUPL_Z BLIS_NUM_ELEM_PER_REG_Z
// -- Default incremental packing blocksizes (n dimension) --
// NOTE: These incremental packing blocksizes (for the n dimension) are only
// used by certain blocked variants. But when the *are* used, they MUST be
// be an integer multiple of NR!
#define BLIS_DEFAULT_NI_FAC 16
#define BLIS_DEFAULT_NI_S (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_S)
#define BLIS_DEFAULT_NI_D (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_D)
#define BLIS_DEFAULT_NI_C (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_C)
#define BLIS_DEFAULT_NI_Z (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_Z)
// -- Default register blocksizes for vectors --
// NOTE: Register blocksizes for vectors are used when packing
// non-contiguous vectors. Similar to that of KR, they can
// typically be set to 1.
#define BLIS_DEFAULT_VR_S 1
#define BLIS_DEFAULT_VR_D 1
#define BLIS_DEFAULT_VR_C 1
#define BLIS_DEFAULT_VR_Z 1
#endif

View File

@@ -1,57 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2013, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef BLIS_CONFIG_H
#define BLIS_CONFIG_H
// Declaration for posix_memalign() needs this.
#define _GNU_SOURCE 1
// Enable memory alignment?
#define BLIS_ENABLE_MEMORY_ALIGNMENT 1
// Memory alignment boundary.
#ifndef BLIS_MEMORY_ALIGNMENT_BOUNDARY
#define BLIS_MEMORY_ALIGNMENT_BOUNDARY 16
#endif
// Enable mixed domain operations?
//#define BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
// Enable extra mixed precision operations?
//#define BLIS_ENABLE_MIXED_PRECISION_SUPPORT
#endif

View File

@@ -1,131 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2013, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// -- LEVEL-3 KERNEL DEFINITIONS -----------------------------------------------
#include "bl2_gemm_opt_4x2.h"
#include "bl2_gemmtrsm_l_opt_4x2.h"
#include "bl2_gemmtrsm_u_opt_4x2.h"
// -- dupl --
#define DUPL_KERNEL dupl_opt_var1
// -- gemm --
#define GEMM_UKERNEL gemm_opt_4x2
// -- trsm-related --
#define GEMMTRSM_L_UKERNEL gemmtrsm_l_opt_4x2
#define GEMMTRSM_U_UKERNEL gemmtrsm_u_opt_4x2
#define TRSM_L_UKERNEL trsm_l_ref_4x2
#define TRSM_U_UKERNEL trsm_u_ref_4x2
// -- LEVEL-1F KERNEL DEFINITIONS ----------------------------------------------
//#include "bl2_axpy2v_opt_var1.h"
//#include "bl2_dotaxpyv_opt_var1.h"
//#include "bl2_axpyf_opt_var1.h"
//#include "bl2_dotxf_opt_var1.h"
//#include "bl2_dotxaxpyf_opt_var1.h"
// -- axpy2v --
#define AXPY2V_KERNEL axpy2v_unb_var1
// -- dotaxpyv --
#define DOTAXPYV_KERNEL dotaxpyv_unb_var1
// -- axpyf --
#define AXPYF_KERNEL axpyf_unb_var1
// -- dotxf --
#define DOTXF_KERNEL dotxf_unb_var1
// -- dotxaxpyf --
#define DOTXAXPYF_KERNEL dotxaxpyf_unb_var1
// -- LEVEL-1 KERNEL DEFINITIONS -----------------------------------------------
//#include "bl2_axpyv_opt_var1.h"
//#include "bl2_dotv_opt_var1.h"
// -- axpyv --
#define AXPYV_KERNEL axpyv_unb_var1
// -- copynzv --
#define COPYNZV_KERNEL copynzv_unb_var1
// -- copyv --
#define COPYV_KERNEL copyv_unb_var1
// -- dotv --
#define DOTV_KERNEL dotv_unb_var1
// -- dotxv --
#define DOTXV_KERNEL dotxv_unb_var1
// -- invertv --
#define INVERTV_KERNEL invertv_unb_var1
// -- scal2v --
#define SCAL2V_KERNEL scal2v_unb_var1
// -- scalv --
#define SCALV_KERNEL scalv_unb_var1
// -- setv --
#define SETV_KERNEL setv_unb_var1

View File

@@ -1 +0,0 @@
../../kernels/x86

View File

@@ -1,98 +0,0 @@
#!/bin/bash
#
# BLIS
# An object-based framework for developing high-performance BLAS-like
# libraries.
#
# Copyright (C) 2013, The University of Texas
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# - Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# - Neither the name of The University of Texas nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
# Only include this block of code once.
ifndef MAKE_DEFS_MK_INCLUDED
MAKE_DEFS_MK_INCLUDED := yes
#
# --- Build definitions --------------------------------------------------------
#
# Variables corresponding to other configure-time options.
BLIS_ENABLE_VERBOSE_MAKE_OUTPUT := no
BLIS_ENABLE_STATIC_BUILD := yes
BLIS_ENABLE_DYNAMIC_BUILD := no
#
# --- Utility program definitions ----------------------------------------------
#
SH := /bin/sh
MV := mv
MKDIR := mkdir -p
RM_F := rm -f
RM_RF := rm -rf
SYMLINK := ln -sf
FIND := find
XARGS := xargs
RANLIB := ranlib
INSTALL := install -c
#
# --- Development tools definitions --------------------------------------------
#
# --- Determine the C compiler and related flags ---
CC := gcc
CPPROCFLAGS :=
CMISCFLAGS := -std=c99 # -fopenmp -pg
CDBGFLAGS := -g
CWARNFLAGS := -Wall # -Wno-comment
COPTFLAGS := -O2 -malign-double
CVECFLAGS := -msse3 -march=native # -mfpmath=sse
# Aggregate all of the flags into two groups: one for optimizable code, and
# one for code that should not be optimized.
CFLAGS := $(CDBGFLAGS) $(COPTFLAGS) $(CVECFLAGS) $(CWARNFLAGS) $(CMISCFLAGS) $(CPPROCFLAGS)
CFLAGS_NOOPT := $(CDBGFLAGS) $(CWARNFLAGS) $(CMISCFLAGS) $(CPPROCFLAGS)
# --- Determine the archiver and related flags ---
AR := ar
ARFLAGS := cru
# --- Determine the linker and related flags ---
LINKER := $(CC)
LDFLAGS :=
# end of ifndef MAKE_DEFS_MK_INCLUDED conditional block
endif

View File

@@ -94,7 +94,7 @@ dim_t bl2_determine_blocksize_f( dim_t i,
// to bottom-right).
// Extract the execution datatype and use it to query the corresponding
// blocksize.
// blocksize value from the blksz_t object.
dt = bl2_obj_execution_datatype( *obj );
b_alg = bl2_blksz_for_type( dt, b );
@@ -120,7 +120,7 @@ dim_t bl2_determine_blocksize_b( dim_t i,
// to top-left).
// Extract the execution datatype and use it to query the corresponding
// blocksize.
// blocksize value from the blksz_t object.
dt = bl2_obj_execution_datatype( *obj );
b_alg = bl2_blksz_for_type( dt, b );

View File

@@ -322,8 +322,8 @@ void bl2_mem_init_pool( char* pool_mem,
}
// Step through the memory pool, beginning with the page-aligned address
// determined above, assigning pointers to the beginning of each m x n
// block to the ith element of the block_ptrs array.
// determined above, assigning pointers to the beginning of each block_size
// bytes to the ith element of the block_ptrs array.
for ( i = 0; i < num_blocks; ++i )
{
// Save the address of pool, which is guaranteed to be page-aligned.