More tweaks to _config.h, _kernel.h; smem tweaks.

Details:
- Moved kernel-related definitions form bl2_config.h to bl2_kernel.h.
- Replaced #define of _GNU_SOURCE with #define of _POSIX_C_SOURCE. This
  accomplishes the same thing (enabling posix_memalign()) without enabling
  all of the GNU extensions we don't need.
- Defined the size of the static memory pool in terms of MC, KC, and NC,
  as well as two new constants that determine how many MCxKC blocks and
  how many KCxNC blocks should be allocated (defined in bl2_config.h).
- In the case of static memory pool exhaustion, replaced the generic
  bl2_abort() with a specific error code call.
This commit is contained in:
Field G. Van Zee
2012-12-18 14:34:02 -06:00
parent 5d8bdb21c4
commit 6fbbdd4e19
10 changed files with 335 additions and 291 deletions

View File

@@ -38,8 +38,9 @@
// -- OPERATING SYSTEM ---------------------------------------------------------
// Declaration for posix_memalign() needs this.
#define _GNU_SOURCE 1
// Enable IEEE Standard 1003.1-2004 (POSIX.1d).
// NOTE: This is needed to enable posix_memalign().
#define _POSIX_C_SOURCE 200112L
@@ -60,8 +61,10 @@
#define BLIS_MEMORY_ALIGNMENT_BOUNDARY 16
#endif
// Static memory pool size.
#define BLIS_STATIC_MEM_POOL_SIZE (256 * 5000 * sizeof(double))
// The number of MC x KC and KC x NC blocks to reserve in the static memory
// pool.
#define BLIS_NUM_MC_X_KC_BLOCKS 2
#define BLIS_NUM_KC_X_NC_BLOCKS 1
// The page size is used by the memory allocator so that static memory
// can be allocated with alignment to the beginning of a page boundary.
@@ -87,141 +90,4 @@
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
// -- Default cache blocksizes --
// Constraints:
//
// (1) MC must be a multiple of:
// (a) MR (for zero-padding purposes) and
// (b) NR.
// (2) NC must be a multiple of
// (a) NR (for zero-padding purposes) and
// (b) MR.
// (3) KC does not need to be multiple of anything, unless the micro-kernel
// specifically requires it (and typically it does not).
//
// NOTE: For BLIS libraries built on block-panel macro-kernels, constraint
// (2b) is relaxed. In this case, (1b) is needed for operation implementations
// involving matrices with diagonals (trmm, trsm). In these cases, we want the
// diagonal offset of any panel of packed matrix A to have a diagonal offset
// that is a multiple of MR. If, instead, the library were to be built on
// block-panel macro-kernels, matrix B would be the one with structure, not A,
// and thus it would be constraint (2b) that would be needed instead of (1b).
//
#define BLIS_DEFAULT_MC_S 128
#define BLIS_DEFAULT_KC_S 256
#define BLIS_DEFAULT_NC_S 8192
#define BLIS_DEFAULT_MC_D 368
#define BLIS_DEFAULT_KC_D 256
#define BLIS_DEFAULT_NC_D 8192
#define BLIS_DEFAULT_MC_C 128
#define BLIS_DEFAULT_KC_C 256
#define BLIS_DEFAULT_NC_C 8192
#define BLIS_DEFAULT_MC_Z 128
#define BLIS_DEFAULT_KC_Z 256
#define BLIS_DEFAULT_NC_Z 8192
// -- Default register blocksizes for inner kernel --
// NOTE: When using the reference configuration, these register blocksizes
// in the m and n dimensions should all be equal to the size expected by
// the reference micro-kernel(s).
#define BLIS_DEFAULT_MR_S 4
#define BLIS_DEFAULT_NR_S 4
#define BLIS_DEFAULT_MR_D 4
#define BLIS_DEFAULT_NR_D 4
#define BLIS_DEFAULT_MR_C 4
#define BLIS_DEFAULT_NR_C 4
#define BLIS_DEFAULT_MR_Z 4
#define BLIS_DEFAULT_NR_Z 4
// NOTE: If the micro-kernel, which is typically unrolled to a factor
// of f, handles leftover edge cases (ie: when k % f > 0) then these
// register blocksizes in the k dimension can be defined to 1.
#define BLIS_DEFAULT_KR_S 1
#define BLIS_DEFAULT_KR_D 1
#define BLIS_DEFAULT_KR_C 1
#define BLIS_DEFAULT_KR_Z 1
// -- Number of elements per vector register --
// NOTE: These constants are typically only used to determine the amount
// of duplication needed when configuring level-3 macro-kernels that
// copy and duplicate elements of B to a temporary duplication buffer
// (so that element-wise vector multiplication and addition instructions
// can be used).
#define BLIS_NUM_ELEM_PER_REG_S 4
#define BLIS_NUM_ELEM_PER_REG_D 2
#define BLIS_NUM_ELEM_PER_REG_C 2
#define BLIS_NUM_ELEM_PER_REG_Z 1
// -- Default switch for duplication of B --
// NOTE: If BLIS_DEFAULT_DUPLICATE_B is set to FALSE, then the
// NUM_DUPL definitions are not used.
//#define BLIS_DEFAULT_DUPLICATE_B TRUE
#define BLIS_DEFAULT_DUPLICATE_B FALSE
#define BLIS_DEFAULT_NUM_DUPL_S BLIS_NUM_ELEM_PER_REG_S
#define BLIS_DEFAULT_NUM_DUPL_D BLIS_NUM_ELEM_PER_REG_D
#define BLIS_DEFAULT_NUM_DUPL_C BLIS_NUM_ELEM_PER_REG_C
#define BLIS_DEFAULT_NUM_DUPL_Z BLIS_NUM_ELEM_PER_REG_Z
// -- Default incremental packing blocksizes (n dimension) --
// NOTE: These incremental packing blocksizes (for the n dimension) are only
// used by certain blocked variants. But when the *are* used, they MUST be
// be an integer multiple of NR!
#define BLIS_DEFAULT_NI_FAC 16
#define BLIS_DEFAULT_NI_S (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_S)
#define BLIS_DEFAULT_NI_D (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_D)
#define BLIS_DEFAULT_NI_C (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_C)
#define BLIS_DEFAULT_NI_Z (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_Z)
// -- LEVEL-1F KERNEL CONSTANTS ------------------------------------------------
// -- Default fusing factors for level-1f operations --
// NOTE: Default fusing factors are not used by the reference implementations
// of level-1f operations. They are here only for use when these operations
// are optimized.
#define BLIS_DEFAULT_FUSING_FACTOR_S 8
#define BLIS_DEFAULT_FUSING_FACTOR_D 4
#define BLIS_DEFAULT_FUSING_FACTOR_C 4
#define BLIS_DEFAULT_FUSING_FACTOR_Z 2
// -- LEVEL-1V KERNEL CONSTANTS ------------------------------------------------
// -- Default register blocksizes for vectors --
// NOTE: Register blocksizes for vectors are used when packing
// non-contiguous vectors. Similar to that of KR, they can
// typically be set to 1.
#define BLIS_DEFAULT_VR_S 1
#define BLIS_DEFAULT_VR_D 1
#define BLIS_DEFAULT_VR_C 1
#define BLIS_DEFAULT_VR_Z 1
#endif

View File

@@ -32,6 +32,146 @@
*/
#ifndef BLIS_KERNEL_H
#define BLIS_KERNEL_H
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
// -- Default cache blocksizes --
// Constraints:
//
// (1) MC must be a multiple of:
// (a) MR (for zero-padding purposes) and
// (b) NR.
// (2) NC must be a multiple of
// (a) NR (for zero-padding purposes) and
// (b) MR.
// (3) KC does not need to be multiple of anything, unless the micro-kernel
// specifically requires it (and typically it does not).
//
// NOTE: For BLIS libraries built on block-panel macro-kernels, constraint
// (2b) is relaxed. In this case, (1b) is needed for operation implementations
// involving matrices with diagonals (trmm, trsm). In these cases, we want the
// diagonal offset of any panel of packed matrix A to have a diagonal offset
// that is a multiple of MR. If, instead, the library were to be built on
// block-panel macro-kernels, matrix B would be the one with structure, not A,
// and thus it would be constraint (2b) that would be needed instead of (1b).
//
#define BLIS_DEFAULT_MC_S 128
#define BLIS_DEFAULT_KC_S 256
#define BLIS_DEFAULT_NC_S 8192
#define BLIS_DEFAULT_MC_D 368
#define BLIS_DEFAULT_KC_D 256
#define BLIS_DEFAULT_NC_D 8192
#define BLIS_DEFAULT_MC_C 128
#define BLIS_DEFAULT_KC_C 256
#define BLIS_DEFAULT_NC_C 8192
#define BLIS_DEFAULT_MC_Z 128
#define BLIS_DEFAULT_KC_Z 256
#define BLIS_DEFAULT_NC_Z 8192
// -- Default register blocksizes for inner kernel --
// NOTE: When using the reference configuration, these register blocksizes
// in the m and n dimensions should all be equal to the size expected by
// the reference micro-kernel(s).
#define BLIS_DEFAULT_MR_S 4
#define BLIS_DEFAULT_NR_S 4
#define BLIS_DEFAULT_MR_D 4
#define BLIS_DEFAULT_NR_D 4
#define BLIS_DEFAULT_MR_C 4
#define BLIS_DEFAULT_NR_C 4
#define BLIS_DEFAULT_MR_Z 4
#define BLIS_DEFAULT_NR_Z 4
// NOTE: If the micro-kernel, which is typically unrolled to a factor
// of f, handles leftover edge cases (ie: when k % f > 0) then these
// register blocksizes in the k dimension can be defined to 1.
#define BLIS_DEFAULT_KR_S 1
#define BLIS_DEFAULT_KR_D 1
#define BLIS_DEFAULT_KR_C 1
#define BLIS_DEFAULT_KR_Z 1
// -- Number of elements per vector register --
// NOTE: These constants are typically only used to determine the amount
// of duplication needed when configuring level-3 macro-kernels that
// copy and duplicate elements of B to a temporary duplication buffer
// (so that element-wise vector multiplication and addition instructions
// can be used).
#define BLIS_NUM_ELEM_PER_REG_S 4
#define BLIS_NUM_ELEM_PER_REG_D 2
#define BLIS_NUM_ELEM_PER_REG_C 2
#define BLIS_NUM_ELEM_PER_REG_Z 1
// -- Default switch for duplication of B --
// NOTE: If BLIS_DEFAULT_DUPLICATE_B is set to FALSE, then the
// NUM_DUPL definitions are not used.
//#define BLIS_DEFAULT_DUPLICATE_B TRUE
#define BLIS_DEFAULT_DUPLICATE_B FALSE
#define BLIS_DEFAULT_NUM_DUPL_S BLIS_NUM_ELEM_PER_REG_S
#define BLIS_DEFAULT_NUM_DUPL_D BLIS_NUM_ELEM_PER_REG_D
#define BLIS_DEFAULT_NUM_DUPL_C BLIS_NUM_ELEM_PER_REG_C
#define BLIS_DEFAULT_NUM_DUPL_Z BLIS_NUM_ELEM_PER_REG_Z
// -- Default incremental packing blocksizes (n dimension) --
// NOTE: These incremental packing blocksizes (for the n dimension) are only
// used by certain blocked variants. But when the *are* used, they MUST be
// be an integer multiple of NR!
#define BLIS_DEFAULT_NI_FAC 16
#define BLIS_DEFAULT_NI_S (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_S)
#define BLIS_DEFAULT_NI_D (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_D)
#define BLIS_DEFAULT_NI_C (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_C)
#define BLIS_DEFAULT_NI_Z (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_Z)
// -- LEVEL-1F KERNEL CONSTANTS ------------------------------------------------
// -- Default fusing factors for level-1f operations --
// NOTE: Default fusing factors are not used by the reference implementations
// of level-1f operations. They are here only for use when these operations
// are optimized.
#define BLIS_DEFAULT_FUSING_FACTOR_S 8
#define BLIS_DEFAULT_FUSING_FACTOR_D 4
#define BLIS_DEFAULT_FUSING_FACTOR_C 4
#define BLIS_DEFAULT_FUSING_FACTOR_Z 2
// -- LEVEL-1V KERNEL CONSTANTS ------------------------------------------------
// -- Default register blocksizes for vectors --
// NOTE: Register blocksizes for vectors are used when packing
// non-contiguous vectors. Similar to that of KR, they can
// typically be set to 1.
#define BLIS_DEFAULT_VR_S 1
#define BLIS_DEFAULT_VR_D 1
#define BLIS_DEFAULT_VR_C 1
#define BLIS_DEFAULT_VR_Z 1
// -- LEVEL-3 KERNEL DEFINITIONS -----------------------------------------------
@@ -155,3 +295,6 @@
#define SETV_KERNEL setv_unb_var1
#endif

View File

@@ -38,8 +38,9 @@
// -- OPERATING SYSTEM ---------------------------------------------------------
// Declaration for posix_memalign() needs this.
#define _GNU_SOURCE 1
// Enable IEEE Standard 1003.1-2004 (POSIX.1d).
// NOTE: This is needed to enable posix_memalign().
#define _POSIX_C_SOURCE 200112L
@@ -60,8 +61,10 @@
#define BLIS_MEMORY_ALIGNMENT_BOUNDARY 16
#endif
// Static memory pool size.
#define BLIS_STATIC_MEM_POOL_SIZE (256 * 5000 * sizeof(double))
// The number of MC x KC and KC x NC blocks to reserve in the static memory
// pool.
#define BLIS_NUM_MC_X_KC_BLOCKS 2
#define BLIS_NUM_KC_X_NC_BLOCKS 1
// The page size is used by the memory allocator so that static memory
// can be allocated with alignment to the beginning of a page boundary.
@@ -87,141 +90,4 @@
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
// -- Default cache blocksizes --
// Constraints:
//
// (1) MC must be a multiple of:
// (a) MR (for zero-padding purposes) and
// (b) NR.
// (2) NC must be a multiple of
// (a) NR (for zero-padding purposes) and
// (b) MR.
// (3) KC does not need to be multiple of anything, unless the micro-kernel
// specifically requires it (and typically it does not).
//
// NOTE: For BLIS libraries built on block-panel macro-kernels, constraint
// (2b) is relaxed. In this case, (1b) is needed for operation implementations
// involving matrices with diagonals (trmm, trsm). In these cases, we want the
// diagonal offset of any panel of packed matrix A to have a diagonal offset
// that is a multiple of MR. If, instead, the library were to be built on
// block-panel macro-kernels, matrix B would be the one with structure, not A,
// and thus it would be constraint (2b) that would be needed instead of (1b).
//
#define BLIS_DEFAULT_MC_S 128
#define BLIS_DEFAULT_KC_S 256
#define BLIS_DEFAULT_NC_S 8192
#define BLIS_DEFAULT_MC_D 128
#define BLIS_DEFAULT_KC_D 256
#define BLIS_DEFAULT_NC_D 8192
#define BLIS_DEFAULT_MC_C 128
#define BLIS_DEFAULT_KC_C 256
#define BLIS_DEFAULT_NC_C 8192
#define BLIS_DEFAULT_MC_Z 128
#define BLIS_DEFAULT_KC_Z 256
#define BLIS_DEFAULT_NC_Z 8192
// -- Default register blocksizes for inner kernel --
// NOTE: When using the reference configuration, these register blocksizes
// in the m and n dimensions should all be equal to the size expected by
// the reference micro-kernel(s).
#define BLIS_DEFAULT_MR_S 4
#define BLIS_DEFAULT_NR_S 4
#define BLIS_DEFAULT_MR_D 4
#define BLIS_DEFAULT_NR_D 4
#define BLIS_DEFAULT_MR_C 4
#define BLIS_DEFAULT_NR_C 4
#define BLIS_DEFAULT_MR_Z 4
#define BLIS_DEFAULT_NR_Z 4
// NOTE: If the micro-kernel, which is typically unrolled to a factor
// of f, handles leftover edge cases (ie: when k % f > 0) then these
// register blocksizes in the k dimension can be defined to 1.
#define BLIS_DEFAULT_KR_S 1
#define BLIS_DEFAULT_KR_D 1
#define BLIS_DEFAULT_KR_C 1
#define BLIS_DEFAULT_KR_Z 1
// -- Number of elements per vector register --
// NOTE: These constants are typically only used to determine the amount
// of duplication needed when configuring level-3 macro-kernels that
// copy and duplicate elements of B to a temporary duplication buffer
// (so that element-wise vector multiplication and addition instructions
// can be used).
#define BLIS_NUM_ELEM_PER_REG_S 4
#define BLIS_NUM_ELEM_PER_REG_D 2
#define BLIS_NUM_ELEM_PER_REG_C 2
#define BLIS_NUM_ELEM_PER_REG_Z 1
// -- Default switch for duplication of B --
// NOTE: If BLIS_DEFAULT_DUPLICATE_B is set to FALSE, then the
// NUM_DUPL definitions are not used.
//#define BLIS_DEFAULT_DUPLICATE_B TRUE
#define BLIS_DEFAULT_DUPLICATE_B FALSE
#define BLIS_DEFAULT_NUM_DUPL_S BLIS_NUM_ELEM_PER_REG_S
#define BLIS_DEFAULT_NUM_DUPL_D BLIS_NUM_ELEM_PER_REG_D
#define BLIS_DEFAULT_NUM_DUPL_C BLIS_NUM_ELEM_PER_REG_C
#define BLIS_DEFAULT_NUM_DUPL_Z BLIS_NUM_ELEM_PER_REG_Z
// -- Default incremental packing blocksizes (n dimension) --
// NOTE: These incremental packing blocksizes (for the n dimension) are only
// used by certain blocked variants. But when the *are* used, they MUST be
// be an integer multiple of NR!
#define BLIS_DEFAULT_NI_FAC 16
#define BLIS_DEFAULT_NI_S (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_S)
#define BLIS_DEFAULT_NI_D (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_D)
#define BLIS_DEFAULT_NI_C (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_C)
#define BLIS_DEFAULT_NI_Z (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_Z)
// -- LEVEL-1F KERNEL CONSTANTS ------------------------------------------------
// -- Default fusing factors for level-1f operations --
// NOTE: Default fusing factors are not used by the reference implementations
// of level-1f operations. They are here only for use when these operations
// are optimized.
#define BLIS_DEFAULT_FUSING_FACTOR_S 8
#define BLIS_DEFAULT_FUSING_FACTOR_D 4
#define BLIS_DEFAULT_FUSING_FACTOR_C 4
#define BLIS_DEFAULT_FUSING_FACTOR_Z 2
// -- LEVEL-1V KERNEL CONSTANTS ------------------------------------------------
// -- Default register blocksizes for vectors --
// NOTE: Register blocksizes for vectors are used when packing
// non-contiguous vectors. Similar to that of KR, they can
// typically be set to 1.
#define BLIS_DEFAULT_VR_S 1
#define BLIS_DEFAULT_VR_D 1
#define BLIS_DEFAULT_VR_C 1
#define BLIS_DEFAULT_VR_Z 1
#endif

View File

@@ -32,6 +32,145 @@
*/
#ifndef BLIS_KERNEL_H
#define BLIS_KERNEL_H
// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
// -- Default cache blocksizes --
// Constraints:
//
// (1) MC must be a multiple of:
// (a) MR (for zero-padding purposes) and
// (b) NR.
// (2) NC must be a multiple of
// (a) NR (for zero-padding purposes) and
// (b) MR.
// (3) KC does not need to be multiple of anything, unless the micro-kernel
// specifically requires it (and typically it does not).
//
// NOTE: For BLIS libraries built on block-panel macro-kernels, constraint
// (2b) is relaxed. In this case, (1b) is needed for operation implementations
// involving matrices with diagonals (trmm, trsm). In these cases, we want the
// diagonal offset of any panel of packed matrix A to have a diagonal offset
// that is a multiple of MR. If, instead, the library were to be built on
// block-panel macro-kernels, matrix B would be the one with structure, not A,
// and thus it would be constraint (2b) that would be needed instead of (1b).
//
#define BLIS_DEFAULT_MC_S 128
#define BLIS_DEFAULT_KC_S 256
#define BLIS_DEFAULT_NC_S 8192
#define BLIS_DEFAULT_MC_D 128
#define BLIS_DEFAULT_KC_D 256
#define BLIS_DEFAULT_NC_D 8192
#define BLIS_DEFAULT_MC_C 128
#define BLIS_DEFAULT_KC_C 256
#define BLIS_DEFAULT_NC_C 8192
#define BLIS_DEFAULT_MC_Z 128
#define BLIS_DEFAULT_KC_Z 256
#define BLIS_DEFAULT_NC_Z 8192
// -- Default register blocksizes for inner kernel --
// NOTE: When using the reference configuration, these register blocksizes
// in the m and n dimensions should all be equal to the size expected by
// the reference micro-kernel(s).
#define BLIS_DEFAULT_MR_S 4
#define BLIS_DEFAULT_NR_S 4
#define BLIS_DEFAULT_MR_D 4
#define BLIS_DEFAULT_NR_D 4
#define BLIS_DEFAULT_MR_C 4
#define BLIS_DEFAULT_NR_C 4
#define BLIS_DEFAULT_MR_Z 4
#define BLIS_DEFAULT_NR_Z 4
// NOTE: If the micro-kernel, which is typically unrolled to a factor
// of f, handles leftover edge cases (ie: when k % f > 0) then these
// register blocksizes in the k dimension can be defined to 1.
#define BLIS_DEFAULT_KR_S 1
#define BLIS_DEFAULT_KR_D 1
#define BLIS_DEFAULT_KR_C 1
#define BLIS_DEFAULT_KR_Z 1
// -- Number of elements per vector register --
// NOTE: These constants are typically only used to determine the amount
// of duplication needed when configuring level-3 macro-kernels that
// copy and duplicate elements of B to a temporary duplication buffer
// (so that element-wise vector multiplication and addition instructions
// can be used).
#define BLIS_NUM_ELEM_PER_REG_S 4
#define BLIS_NUM_ELEM_PER_REG_D 2
#define BLIS_NUM_ELEM_PER_REG_C 2
#define BLIS_NUM_ELEM_PER_REG_Z 1
// -- Default switch for duplication of B --
// NOTE: If BLIS_DEFAULT_DUPLICATE_B is set to FALSE, then the
// NUM_DUPL definitions are not used.
//#define BLIS_DEFAULT_DUPLICATE_B TRUE
#define BLIS_DEFAULT_DUPLICATE_B FALSE
#define BLIS_DEFAULT_NUM_DUPL_S BLIS_NUM_ELEM_PER_REG_S
#define BLIS_DEFAULT_NUM_DUPL_D BLIS_NUM_ELEM_PER_REG_D
#define BLIS_DEFAULT_NUM_DUPL_C BLIS_NUM_ELEM_PER_REG_C
#define BLIS_DEFAULT_NUM_DUPL_Z BLIS_NUM_ELEM_PER_REG_Z
// -- Default incremental packing blocksizes (n dimension) --
// NOTE: These incremental packing blocksizes (for the n dimension) are only
// used by certain blocked variants. But when the *are* used, they MUST be
// be an integer multiple of NR!
#define BLIS_DEFAULT_NI_FAC 16
#define BLIS_DEFAULT_NI_S (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_S)
#define BLIS_DEFAULT_NI_D (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_D)
#define BLIS_DEFAULT_NI_C (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_C)
#define BLIS_DEFAULT_NI_Z (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_Z)
// -- LEVEL-1F KERNEL CONSTANTS ------------------------------------------------
// -- Default fusing factors for level-1f operations --
// NOTE: Default fusing factors are not used by the reference implementations
// of level-1f operations. They are here only for use when these operations
// are optimized.
#define BLIS_DEFAULT_FUSING_FACTOR_S 8
#define BLIS_DEFAULT_FUSING_FACTOR_D 4
#define BLIS_DEFAULT_FUSING_FACTOR_C 4
#define BLIS_DEFAULT_FUSING_FACTOR_Z 2
// -- LEVEL-1V KERNEL CONSTANTS ------------------------------------------------
// -- Default register blocksizes for vectors --
// NOTE: Register blocksizes for vectors are used when packing
// non-contiguous vectors. Similar to that of KR, they can
// typically be set to 1.
#define BLIS_DEFAULT_VR_S 1
#define BLIS_DEFAULT_VR_D 1
#define BLIS_DEFAULT_VR_C 1
#define BLIS_DEFAULT_VR_Z 1
// -- LEVEL-3 KERNEL DEFINITIONS -----------------------------------------------
@@ -142,3 +281,6 @@
#define SETV_KERNEL setv_unb_var1
#endif

View File

@@ -537,3 +537,7 @@ err_t bl2_check_packv_schema_on_unpack( obj_t* a )
return e_val;
}
// -- Memory allocator checks --------------------------------------------------

View File

@@ -162,5 +162,8 @@ void bl2_error_msgs_init( void )
sprintf( bl2_error_string_for_code(BLIS_PACK_SCHEMA_NOT_SUPPORTED_FOR_UNPACK),
"Pack schema not yet supported/implemented for use with unpacking." );
sprintf( bl2_error_string_for_code(BLIS_EXHAUSTED_STATIC_MEMORY_POOL),
"Attempted to allocate more memory from static pool than is available." );
}

View File

@@ -34,11 +34,29 @@
#include "blis2.h"
#define N_ELEM_SMEM ( BLIS_STATIC_MEM_POOL_SIZE / sizeof( double ) )
// Static memory pool size (in units of doubles).
#define BLIS_NUM_ELEM_SMEM ( \
BLIS_NUM_MC_X_KC_BLOCKS * \
( BLIS_DEFAULT_MC_D * \
BLIS_DEFAULT_KC_D \
) + \
BLIS_NUM_KC_X_NC_BLOCKS * \
( BLIS_DEFAULT_KC_D * \
BLIS_DEFAULT_NC_D \
) + \
2 * \
( BLIS_MAX_PREFETCH_BYTE_OFFSET / \
sizeof(double) \
) \
)
double smem[ N_ELEM_SMEM ];
// Static memory pool.
double smem[ BLIS_NUM_ELEM_SMEM ];
// Pointer to current "stack" location in the memory pool.
double* mc = smem;
// A counter that keeps track of how many chunks have been allocated.
int counter = 0;
@@ -110,8 +128,8 @@ void* bl2_malloc_s( siz_t buf_size )
rmem = ( void* )mc;
mc += ( buf_size / sizeof( double ) );
if ( mc >= smem + ( N_ELEM_SMEM ) )
bl2_abort();
if ( mc >= smem + BLIS_NUM_ELEM_SMEM )
bl2_check_error_code( BLIS_EXHAUSTED_STATIC_MEMORY_POOL );
++counter;
@@ -128,10 +146,9 @@ void bl2_free_s( void* p )
void bl2_mm_clear_smem( void )
{
dim_t n = N_ELEM_SMEM;
dim_t i;
for ( i = 0; i < n; ++i )
for ( i = 0; i < BLIS_NUM_ELEM_SMEM; ++i )
{
smem[i] = 0.0;
}

View File

@@ -38,7 +38,7 @@
// -- Error-related macros --
// Used to determine the size of the array of error strings.
#define BLIS_MAX_NUM_ERR_MSGS 100
#define BLIS_MAX_NUM_ERR_MSGS 200
#define BLIS_MAX_ERR_MSG_LENGTH 200
// Used to insert filenames and line numbers into error-checking code.

View File

@@ -461,7 +461,10 @@ typedef enum
// Packing-specific errors
BLIS_PACK_SCHEMA_NOT_SUPPORTED_FOR_UNPACK = ( -90),
BLIS_ERROR_CODE_MAX = (-100)
// Memory allocator errors
BLIS_EXHAUSTED_STATIC_MEMORY_POOL = (-100),
BLIS_ERROR_CODE_MAX = (-110)
} err_t;
#endif

View File

@@ -1 +1 @@
0.0.1-4
0.0.1-5