More tweaks to _config.h, _kernel.h; smem tweaks.

Details: - Moved kernel-related definitions form bl2_config.h to bl2_kernel.h. - Replaced #define of _GNU_SOURCE with #define of _POSIX_C_SOURCE. This accomplishes the same thing (enabling posix_memalign()) without enabling all of the GNU extensions we don't need. - Defined the size of the static memory pool in terms of MC, KC, and NC, as well as two new constants that determine how many MCxKC blocks and how many KCxNC blocks should be allocated (defined in bl2_config.h). - In the case of static memory pool exhaustion, replaced the generic bl2_abort() with a specific error code call.
2026-04-19 23:28:52 +00:00 · 2012-12-18 14:34:02 -06:00
parent 5d8bdb21c4
commit 6fbbdd4e19
10 changed files with 335 additions and 291 deletions
--- a/config/clarksville/bl2_config.h
+++ b/config/clarksville/bl2_config.h
@@ -38,8 +38,9 @@

 // -- OPERATING SYSTEM ---------------------------------------------------------

-// Declaration for posix_memalign() needs this.
-#define _GNU_SOURCE 1
+// Enable IEEE Standard 1003.1-2004 (POSIX.1d). 
+// NOTE: This is needed to enable posix_memalign().
+#define _POSIX_C_SOURCE 200112L



@@ -60,8 +61,10 @@
  #define BLIS_MEMORY_ALIGNMENT_BOUNDARY 16
 #endif

-// Static memory pool size.
-#define BLIS_STATIC_MEM_POOL_SIZE        (256 * 5000 * sizeof(double))
+// The number of MC x KC and KC x NC blocks to reserve in the static memory
+// pool.
+#define BLIS_NUM_MC_X_KC_BLOCKS          2
+#define BLIS_NUM_KC_X_NC_BLOCKS          1

 // The page size is used by the memory allocator so that static memory
 // can be allocated with alignment to the beginning of a page boundary.
@@ -87,141 +90,4 @@



-// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
-
-// -- Default cache blocksizes --
-
-// Constraints:
-//
-// (1) MC must be a multiple of:
-//     (a) MR (for zero-padding purposes) and
-//     (b) NR.
-// (2) NC must be a multiple of
-//     (a) NR (for zero-padding purposes) and
-//     (b) MR.
-// (3) KC does not need to be multiple of anything, unless the micro-kernel
-//     specifically requires it (and typically it does not).
-// 
-// NOTE: For BLIS libraries built on block-panel macro-kernels, constraint
-// (2b) is relaxed. In this case, (1b) is needed for operation implementations
-// involving matrices with diagonals (trmm, trsm). In these cases, we want the
-// diagonal offset of any panel of packed matrix A to have a diagonal offset
-// that is a multiple of MR. If, instead, the library were to be built on
-// block-panel macro-kernels, matrix B would be the one with structure, not A,
-// and thus it would be constraint (2b) that would be needed instead of (1b).
-//
-
-#define BLIS_DEFAULT_MC_S              128
-#define BLIS_DEFAULT_KC_S              256
-#define BLIS_DEFAULT_NC_S              8192
-
-#define BLIS_DEFAULT_MC_D              368
-#define BLIS_DEFAULT_KC_D              256
-#define BLIS_DEFAULT_NC_D              8192
-
-#define BLIS_DEFAULT_MC_C              128
-#define BLIS_DEFAULT_KC_C              256
-#define BLIS_DEFAULT_NC_C              8192
-
-#define BLIS_DEFAULT_MC_Z              128
-#define BLIS_DEFAULT_KC_Z              256
-#define BLIS_DEFAULT_NC_Z              8192
-
-// -- Default register blocksizes for inner kernel --
-
-// NOTE: When using the reference configuration, these register blocksizes
-// in the m and n dimensions should all be equal to the size expected by
-// the reference micro-kernel(s).
-
-#define BLIS_DEFAULT_MR_S              4
-#define BLIS_DEFAULT_NR_S              4
-
-#define BLIS_DEFAULT_MR_D              4
-#define BLIS_DEFAULT_NR_D              4
-
-#define BLIS_DEFAULT_MR_C              4
-#define BLIS_DEFAULT_NR_C              4
-
-#define BLIS_DEFAULT_MR_Z              4
-#define BLIS_DEFAULT_NR_Z              4
-
-// NOTE: If the micro-kernel, which is typically unrolled to a factor
-// of f, handles leftover edge cases (ie: when k % f > 0) then these
-// register blocksizes in the k dimension can be defined to 1.
-
-#define BLIS_DEFAULT_KR_S              1
-#define BLIS_DEFAULT_KR_D              1
-#define BLIS_DEFAULT_KR_C              1
-#define BLIS_DEFAULT_KR_Z              1
-
-// -- Number of elements per vector register --
-
-// NOTE: These constants are typically only used to determine the amount
-// of duplication needed when configuring level-3 macro-kernels that
-// copy and duplicate elements of B to a temporary duplication buffer
-// (so that element-wise vector multiplication and addition instructions
-// can be used).
-
-#define BLIS_NUM_ELEM_PER_REG_S        4
-#define BLIS_NUM_ELEM_PER_REG_D        2
-#define BLIS_NUM_ELEM_PER_REG_C        2
-#define BLIS_NUM_ELEM_PER_REG_Z        1
-
-// -- Default switch for duplication of B --
-
-// NOTE: If BLIS_DEFAULT_DUPLICATE_B is set to FALSE, then the
-// NUM_DUPL definitions are not used.
-
-//#define BLIS_DEFAULT_DUPLICATE_B       TRUE
-#define BLIS_DEFAULT_DUPLICATE_B       FALSE
-#define BLIS_DEFAULT_NUM_DUPL_S        BLIS_NUM_ELEM_PER_REG_S
-#define BLIS_DEFAULT_NUM_DUPL_D        BLIS_NUM_ELEM_PER_REG_D
-#define BLIS_DEFAULT_NUM_DUPL_C        BLIS_NUM_ELEM_PER_REG_C
-#define BLIS_DEFAULT_NUM_DUPL_Z        BLIS_NUM_ELEM_PER_REG_Z
-
-// -- Default incremental packing blocksizes (n dimension) --
-
-// NOTE: These incremental packing blocksizes (for the n dimension) are only
-// used by certain blocked variants. But when the *are* used, they MUST be
-// be an integer multiple of NR!
-
-#define BLIS_DEFAULT_NI_FAC            16
-#define BLIS_DEFAULT_NI_S              (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_S)
-#define BLIS_DEFAULT_NI_D              (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_D)
-#define BLIS_DEFAULT_NI_C              (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_C)
-#define BLIS_DEFAULT_NI_Z              (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_Z)
-
-
-
-// -- LEVEL-1F KERNEL CONSTANTS ------------------------------------------------
-
-// -- Default fusing factors for level-1f operations --
-
-// NOTE: Default fusing factors are not used by the reference implementations
-// of level-1f operations. They are here only for use when these operations
-// are optimized.
-
-#define BLIS_DEFAULT_FUSING_FACTOR_S   8
-#define BLIS_DEFAULT_FUSING_FACTOR_D   4
-#define BLIS_DEFAULT_FUSING_FACTOR_C   4
-#define BLIS_DEFAULT_FUSING_FACTOR_Z   2
-
-
-
-// -- LEVEL-1V KERNEL CONSTANTS ------------------------------------------------
-
-// -- Default register blocksizes for vectors --
-
-// NOTE: Register blocksizes for vectors are used when packing
-// non-contiguous vectors. Similar to that of KR, they can
-// typically be set to 1.
-
-#define BLIS_DEFAULT_VR_S              1
-#define BLIS_DEFAULT_VR_D              1
-#define BLIS_DEFAULT_VR_C              1
-#define BLIS_DEFAULT_VR_Z              1
-
-
-
-
 #endif
--- a/config/clarksville/bl2_kernel.h
+++ b/config/clarksville/bl2_kernel.h
@@ -32,6 +32,146 @@

 */

+#ifndef BLIS_KERNEL_H
+#define BLIS_KERNEL_H
+
+
+// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
+
+// -- Default cache blocksizes --
+
+// Constraints:
+//
+// (1) MC must be a multiple of:
+//     (a) MR (for zero-padding purposes) and
+//     (b) NR.
+// (2) NC must be a multiple of
+//     (a) NR (for zero-padding purposes) and
+//     (b) MR.
+// (3) KC does not need to be multiple of anything, unless the micro-kernel
+//     specifically requires it (and typically it does not).
+// 
+// NOTE: For BLIS libraries built on block-panel macro-kernels, constraint
+// (2b) is relaxed. In this case, (1b) is needed for operation implementations
+// involving matrices with diagonals (trmm, trsm). In these cases, we want the
+// diagonal offset of any panel of packed matrix A to have a diagonal offset
+// that is a multiple of MR. If, instead, the library were to be built on
+// block-panel macro-kernels, matrix B would be the one with structure, not A,
+// and thus it would be constraint (2b) that would be needed instead of (1b).
+//
+
+#define BLIS_DEFAULT_MC_S              128
+#define BLIS_DEFAULT_KC_S              256
+#define BLIS_DEFAULT_NC_S              8192
+
+#define BLIS_DEFAULT_MC_D              368
+#define BLIS_DEFAULT_KC_D              256
+#define BLIS_DEFAULT_NC_D              8192
+
+#define BLIS_DEFAULT_MC_C              128
+#define BLIS_DEFAULT_KC_C              256
+#define BLIS_DEFAULT_NC_C              8192
+
+#define BLIS_DEFAULT_MC_Z              128
+#define BLIS_DEFAULT_KC_Z              256
+#define BLIS_DEFAULT_NC_Z              8192
+
+// -- Default register blocksizes for inner kernel --
+
+// NOTE: When using the reference configuration, these register blocksizes
+// in the m and n dimensions should all be equal to the size expected by
+// the reference micro-kernel(s).
+
+#define BLIS_DEFAULT_MR_S              4
+#define BLIS_DEFAULT_NR_S              4
+
+#define BLIS_DEFAULT_MR_D              4
+#define BLIS_DEFAULT_NR_D              4
+
+#define BLIS_DEFAULT_MR_C              4
+#define BLIS_DEFAULT_NR_C              4
+
+#define BLIS_DEFAULT_MR_Z              4
+#define BLIS_DEFAULT_NR_Z              4
+
+// NOTE: If the micro-kernel, which is typically unrolled to a factor
+// of f, handles leftover edge cases (ie: when k % f > 0) then these
+// register blocksizes in the k dimension can be defined to 1.
+
+#define BLIS_DEFAULT_KR_S              1
+#define BLIS_DEFAULT_KR_D              1
+#define BLIS_DEFAULT_KR_C              1
+#define BLIS_DEFAULT_KR_Z              1
+
+// -- Number of elements per vector register --
+
+// NOTE: These constants are typically only used to determine the amount
+// of duplication needed when configuring level-3 macro-kernels that
+// copy and duplicate elements of B to a temporary duplication buffer
+// (so that element-wise vector multiplication and addition instructions
+// can be used).
+
+#define BLIS_NUM_ELEM_PER_REG_S        4
+#define BLIS_NUM_ELEM_PER_REG_D        2
+#define BLIS_NUM_ELEM_PER_REG_C        2
+#define BLIS_NUM_ELEM_PER_REG_Z        1
+
+// -- Default switch for duplication of B --
+
+// NOTE: If BLIS_DEFAULT_DUPLICATE_B is set to FALSE, then the
+// NUM_DUPL definitions are not used.
+
+//#define BLIS_DEFAULT_DUPLICATE_B       TRUE
+#define BLIS_DEFAULT_DUPLICATE_B       FALSE
+#define BLIS_DEFAULT_NUM_DUPL_S        BLIS_NUM_ELEM_PER_REG_S
+#define BLIS_DEFAULT_NUM_DUPL_D        BLIS_NUM_ELEM_PER_REG_D
+#define BLIS_DEFAULT_NUM_DUPL_C        BLIS_NUM_ELEM_PER_REG_C
+#define BLIS_DEFAULT_NUM_DUPL_Z        BLIS_NUM_ELEM_PER_REG_Z
+
+// -- Default incremental packing blocksizes (n dimension) --
+
+// NOTE: These incremental packing blocksizes (for the n dimension) are only
+// used by certain blocked variants. But when the *are* used, they MUST be
+// be an integer multiple of NR!
+
+#define BLIS_DEFAULT_NI_FAC            16
+#define BLIS_DEFAULT_NI_S              (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_S)
+#define BLIS_DEFAULT_NI_D              (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_D)
+#define BLIS_DEFAULT_NI_C              (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_C)
+#define BLIS_DEFAULT_NI_Z              (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_Z)
+
+
+
+// -- LEVEL-1F KERNEL CONSTANTS ------------------------------------------------
+
+// -- Default fusing factors for level-1f operations --
+
+// NOTE: Default fusing factors are not used by the reference implementations
+// of level-1f operations. They are here only for use when these operations
+// are optimized.
+
+#define BLIS_DEFAULT_FUSING_FACTOR_S   8
+#define BLIS_DEFAULT_FUSING_FACTOR_D   4
+#define BLIS_DEFAULT_FUSING_FACTOR_C   4
+#define BLIS_DEFAULT_FUSING_FACTOR_Z   2
+
+
+
+// -- LEVEL-1V KERNEL CONSTANTS ------------------------------------------------
+
+// -- Default register blocksizes for vectors --
+
+// NOTE: Register blocksizes for vectors are used when packing
+// non-contiguous vectors. Similar to that of KR, they can
+// typically be set to 1.
+
+#define BLIS_DEFAULT_VR_S              1
+#define BLIS_DEFAULT_VR_D              1
+#define BLIS_DEFAULT_VR_C              1
+#define BLIS_DEFAULT_VR_Z              1
+
+
+

 // -- LEVEL-3 KERNEL DEFINITIONS -----------------------------------------------

@@ -155,3 +295,6 @@
 #define SETV_KERNEL          setv_unb_var1


+
+#endif
+
--- a/config/reference/bl2_config.h
+++ b/config/reference/bl2_config.h
@@ -38,8 +38,9 @@

 // -- OPERATING SYSTEM ---------------------------------------------------------

-// Declaration for posix_memalign() needs this.
-#define _GNU_SOURCE 1
+// Enable IEEE Standard 1003.1-2004 (POSIX.1d). 
+// NOTE: This is needed to enable posix_memalign().
+#define _POSIX_C_SOURCE 200112L



@@ -60,8 +61,10 @@
  #define BLIS_MEMORY_ALIGNMENT_BOUNDARY 16
 #endif

-// Static memory pool size.
-#define BLIS_STATIC_MEM_POOL_SIZE        (256 * 5000 * sizeof(double))
+// The number of MC x KC and KC x NC blocks to reserve in the static memory
+// pool.
+#define BLIS_NUM_MC_X_KC_BLOCKS          2
+#define BLIS_NUM_KC_X_NC_BLOCKS          1

 // The page size is used by the memory allocator so that static memory
 // can be allocated with alignment to the beginning of a page boundary.
@@ -87,141 +90,4 @@



-// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
-
-// -- Default cache blocksizes --
-
-// Constraints:
-//
-// (1) MC must be a multiple of:
-//     (a) MR (for zero-padding purposes) and
-//     (b) NR.
-// (2) NC must be a multiple of
-//     (a) NR (for zero-padding purposes) and
-//     (b) MR.
-// (3) KC does not need to be multiple of anything, unless the micro-kernel
-//     specifically requires it (and typically it does not).
-// 
-// NOTE: For BLIS libraries built on block-panel macro-kernels, constraint
-// (2b) is relaxed. In this case, (1b) is needed for operation implementations
-// involving matrices with diagonals (trmm, trsm). In these cases, we want the
-// diagonal offset of any panel of packed matrix A to have a diagonal offset
-// that is a multiple of MR. If, instead, the library were to be built on
-// block-panel macro-kernels, matrix B would be the one with structure, not A,
-// and thus it would be constraint (2b) that would be needed instead of (1b).
-//
-
-#define BLIS_DEFAULT_MC_S              128
-#define BLIS_DEFAULT_KC_S              256
-#define BLIS_DEFAULT_NC_S              8192
-
-#define BLIS_DEFAULT_MC_D              128
-#define BLIS_DEFAULT_KC_D              256
-#define BLIS_DEFAULT_NC_D              8192
-
-#define BLIS_DEFAULT_MC_C              128
-#define BLIS_DEFAULT_KC_C              256
-#define BLIS_DEFAULT_NC_C              8192
-
-#define BLIS_DEFAULT_MC_Z              128
-#define BLIS_DEFAULT_KC_Z              256
-#define BLIS_DEFAULT_NC_Z              8192
-
-// -- Default register blocksizes for inner kernel --
-
-// NOTE: When using the reference configuration, these register blocksizes
-// in the m and n dimensions should all be equal to the size expected by
-// the reference micro-kernel(s).
-
-#define BLIS_DEFAULT_MR_S              4
-#define BLIS_DEFAULT_NR_S              4
-
-#define BLIS_DEFAULT_MR_D              4
-#define BLIS_DEFAULT_NR_D              4
-
-#define BLIS_DEFAULT_MR_C              4
-#define BLIS_DEFAULT_NR_C              4
-
-#define BLIS_DEFAULT_MR_Z              4
-#define BLIS_DEFAULT_NR_Z              4
-
-// NOTE: If the micro-kernel, which is typically unrolled to a factor
-// of f, handles leftover edge cases (ie: when k % f > 0) then these
-// register blocksizes in the k dimension can be defined to 1.
-
-#define BLIS_DEFAULT_KR_S              1
-#define BLIS_DEFAULT_KR_D              1
-#define BLIS_DEFAULT_KR_C              1
-#define BLIS_DEFAULT_KR_Z              1
-
-// -- Number of elements per vector register --
-
-// NOTE: These constants are typically only used to determine the amount
-// of duplication needed when configuring level-3 macro-kernels that
-// copy and duplicate elements of B to a temporary duplication buffer
-// (so that element-wise vector multiplication and addition instructions
-// can be used).
-
-#define BLIS_NUM_ELEM_PER_REG_S        4
-#define BLIS_NUM_ELEM_PER_REG_D        2
-#define BLIS_NUM_ELEM_PER_REG_C        2
-#define BLIS_NUM_ELEM_PER_REG_Z        1
-
-// -- Default switch for duplication of B --
-
-// NOTE: If BLIS_DEFAULT_DUPLICATE_B is set to FALSE, then the
-// NUM_DUPL definitions are not used.
-
-//#define BLIS_DEFAULT_DUPLICATE_B       TRUE
-#define BLIS_DEFAULT_DUPLICATE_B       FALSE
-#define BLIS_DEFAULT_NUM_DUPL_S        BLIS_NUM_ELEM_PER_REG_S
-#define BLIS_DEFAULT_NUM_DUPL_D        BLIS_NUM_ELEM_PER_REG_D
-#define BLIS_DEFAULT_NUM_DUPL_C        BLIS_NUM_ELEM_PER_REG_C
-#define BLIS_DEFAULT_NUM_DUPL_Z        BLIS_NUM_ELEM_PER_REG_Z
-
-// -- Default incremental packing blocksizes (n dimension) --
-
-// NOTE: These incremental packing blocksizes (for the n dimension) are only
-// used by certain blocked variants. But when the *are* used, they MUST be
-// be an integer multiple of NR!
-
-#define BLIS_DEFAULT_NI_FAC            16
-#define BLIS_DEFAULT_NI_S              (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_S)
-#define BLIS_DEFAULT_NI_D              (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_D)
-#define BLIS_DEFAULT_NI_C              (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_C)
-#define BLIS_DEFAULT_NI_Z              (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_Z)
-
-
-
-// -- LEVEL-1F KERNEL CONSTANTS ------------------------------------------------
-
-// -- Default fusing factors for level-1f operations --
-
-// NOTE: Default fusing factors are not used by the reference implementations
-// of level-1f operations. They are here only for use when these operations
-// are optimized.
-
-#define BLIS_DEFAULT_FUSING_FACTOR_S   8
-#define BLIS_DEFAULT_FUSING_FACTOR_D   4
-#define BLIS_DEFAULT_FUSING_FACTOR_C   4
-#define BLIS_DEFAULT_FUSING_FACTOR_Z   2
-
-
-
-// -- LEVEL-1V KERNEL CONSTANTS ------------------------------------------------
-
-// -- Default register blocksizes for vectors --
-
-// NOTE: Register blocksizes for vectors are used when packing
-// non-contiguous vectors. Similar to that of KR, they can
-// typically be set to 1.
-
-#define BLIS_DEFAULT_VR_S              1
-#define BLIS_DEFAULT_VR_D              1
-#define BLIS_DEFAULT_VR_C              1
-#define BLIS_DEFAULT_VR_Z              1
-
-
-
-
 #endif
--- a/config/reference/bl2_kernel.h
+++ b/config/reference/bl2_kernel.h
@@ -32,6 +32,145 @@

 */

+#ifndef BLIS_KERNEL_H
+#define BLIS_KERNEL_H
+
+
+// -- LEVEL-3 MICRO-KERNEL CONSTANTS -------------------------------------------
+
+// -- Default cache blocksizes --
+
+// Constraints:
+//
+// (1) MC must be a multiple of:
+//     (a) MR (for zero-padding purposes) and
+//     (b) NR.
+// (2) NC must be a multiple of
+//     (a) NR (for zero-padding purposes) and
+//     (b) MR.
+// (3) KC does not need to be multiple of anything, unless the micro-kernel
+//     specifically requires it (and typically it does not).
+// 
+// NOTE: For BLIS libraries built on block-panel macro-kernels, constraint
+// (2b) is relaxed. In this case, (1b) is needed for operation implementations
+// involving matrices with diagonals (trmm, trsm). In these cases, we want the
+// diagonal offset of any panel of packed matrix A to have a diagonal offset
+// that is a multiple of MR. If, instead, the library were to be built on
+// block-panel macro-kernels, matrix B would be the one with structure, not A,
+// and thus it would be constraint (2b) that would be needed instead of (1b).
+//
+
+#define BLIS_DEFAULT_MC_S              128
+#define BLIS_DEFAULT_KC_S              256
+#define BLIS_DEFAULT_NC_S              8192
+
+#define BLIS_DEFAULT_MC_D              128
+#define BLIS_DEFAULT_KC_D              256
+#define BLIS_DEFAULT_NC_D              8192
+
+#define BLIS_DEFAULT_MC_C              128
+#define BLIS_DEFAULT_KC_C              256
+#define BLIS_DEFAULT_NC_C              8192
+
+#define BLIS_DEFAULT_MC_Z              128
+#define BLIS_DEFAULT_KC_Z              256
+#define BLIS_DEFAULT_NC_Z              8192
+
+// -- Default register blocksizes for inner kernel --
+
+// NOTE: When using the reference configuration, these register blocksizes
+// in the m and n dimensions should all be equal to the size expected by
+// the reference micro-kernel(s).
+
+#define BLIS_DEFAULT_MR_S              4
+#define BLIS_DEFAULT_NR_S              4
+
+#define BLIS_DEFAULT_MR_D              4
+#define BLIS_DEFAULT_NR_D              4
+
+#define BLIS_DEFAULT_MR_C              4
+#define BLIS_DEFAULT_NR_C              4
+
+#define BLIS_DEFAULT_MR_Z              4
+#define BLIS_DEFAULT_NR_Z              4
+
+// NOTE: If the micro-kernel, which is typically unrolled to a factor
+// of f, handles leftover edge cases (ie: when k % f > 0) then these
+// register blocksizes in the k dimension can be defined to 1.
+
+#define BLIS_DEFAULT_KR_S              1
+#define BLIS_DEFAULT_KR_D              1
+#define BLIS_DEFAULT_KR_C              1
+#define BLIS_DEFAULT_KR_Z              1
+
+// -- Number of elements per vector register --
+
+// NOTE: These constants are typically only used to determine the amount
+// of duplication needed when configuring level-3 macro-kernels that
+// copy and duplicate elements of B to a temporary duplication buffer
+// (so that element-wise vector multiplication and addition instructions
+// can be used).
+
+#define BLIS_NUM_ELEM_PER_REG_S        4
+#define BLIS_NUM_ELEM_PER_REG_D        2
+#define BLIS_NUM_ELEM_PER_REG_C        2
+#define BLIS_NUM_ELEM_PER_REG_Z        1
+
+// -- Default switch for duplication of B --
+
+// NOTE: If BLIS_DEFAULT_DUPLICATE_B is set to FALSE, then the
+// NUM_DUPL definitions are not used.
+
+//#define BLIS_DEFAULT_DUPLICATE_B       TRUE
+#define BLIS_DEFAULT_DUPLICATE_B       FALSE
+#define BLIS_DEFAULT_NUM_DUPL_S        BLIS_NUM_ELEM_PER_REG_S
+#define BLIS_DEFAULT_NUM_DUPL_D        BLIS_NUM_ELEM_PER_REG_D
+#define BLIS_DEFAULT_NUM_DUPL_C        BLIS_NUM_ELEM_PER_REG_C
+#define BLIS_DEFAULT_NUM_DUPL_Z        BLIS_NUM_ELEM_PER_REG_Z
+
+// -- Default incremental packing blocksizes (n dimension) --
+
+// NOTE: These incremental packing blocksizes (for the n dimension) are only
+// used by certain blocked variants. But when the *are* used, they MUST be
+// be an integer multiple of NR!
+
+#define BLIS_DEFAULT_NI_FAC            16
+#define BLIS_DEFAULT_NI_S              (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_S)
+#define BLIS_DEFAULT_NI_D              (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_D)
+#define BLIS_DEFAULT_NI_C              (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_C)
+#define BLIS_DEFAULT_NI_Z              (BLIS_DEFAULT_NI_FAC * BLIS_DEFAULT_NR_Z)
+
+
+
+// -- LEVEL-1F KERNEL CONSTANTS ------------------------------------------------
+
+// -- Default fusing factors for level-1f operations --
+
+// NOTE: Default fusing factors are not used by the reference implementations
+// of level-1f operations. They are here only for use when these operations
+// are optimized.
+
+#define BLIS_DEFAULT_FUSING_FACTOR_S   8
+#define BLIS_DEFAULT_FUSING_FACTOR_D   4
+#define BLIS_DEFAULT_FUSING_FACTOR_C   4
+#define BLIS_DEFAULT_FUSING_FACTOR_Z   2
+
+
+
+// -- LEVEL-1V KERNEL CONSTANTS ------------------------------------------------
+
+// -- Default register blocksizes for vectors --
+
+// NOTE: Register blocksizes for vectors are used when packing
+// non-contiguous vectors. Similar to that of KR, they can
+// typically be set to 1.
+
+#define BLIS_DEFAULT_VR_S              1
+#define BLIS_DEFAULT_VR_D              1
+#define BLIS_DEFAULT_VR_C              1
+#define BLIS_DEFAULT_VR_Z              1
+
+

 // -- LEVEL-3 KERNEL DEFINITIONS -----------------------------------------------

@@ -142,3 +281,6 @@
 #define SETV_KERNEL          setv_unb_var1


+
+#endif
+
--- a/frame/base/bl2_check.c
+++ b/frame/base/bl2_check.c
@@ -537,3 +537,7 @@ err_t bl2_check_packv_schema_on_unpack( obj_t* a )
 	return e_val;
 }

+
+// -- Memory allocator checks --------------------------------------------------
+
+
--- a/frame/base/bl2_error.c
+++ b/frame/base/bl2_error.c
@@ -162,5 +162,8 @@ void bl2_error_msgs_init( void )

 	sprintf( bl2_error_string_for_code(BLIS_PACK_SCHEMA_NOT_SUPPORTED_FOR_UNPACK),
 	         "Pack schema not yet supported/implemented for use with unpacking." );
+
+	sprintf( bl2_error_string_for_code(BLIS_EXHAUSTED_STATIC_MEMORY_POOL),
+	         "Attempted to allocate more memory from static pool than is available." );
 }

--- a/frame/base/bl2_mem.c
+++ b/frame/base/bl2_mem.c
@@ -34,11 +34,29 @@

 #include "blis2.h"

-#define N_ELEM_SMEM ( BLIS_STATIC_MEM_POOL_SIZE / sizeof( double ) )
+// Static memory pool size (in units of doubles).
+#define BLIS_NUM_ELEM_SMEM   ( \
+                               BLIS_NUM_MC_X_KC_BLOCKS * \
+                               ( BLIS_DEFAULT_MC_D * \
+                                 BLIS_DEFAULT_KC_D   \
+                               ) + \
+                               BLIS_NUM_KC_X_NC_BLOCKS * \
+                               ( BLIS_DEFAULT_KC_D * \
+                                 BLIS_DEFAULT_NC_D   \
+                               ) + \
+                               2 * \
+                               ( BLIS_MAX_PREFETCH_BYTE_OFFSET / \
+                                 sizeof(double) \
+                               ) \
+                             )

-double  smem[ N_ELEM_SMEM ];
+// Static memory pool.
+double  smem[ BLIS_NUM_ELEM_SMEM ];

+// Pointer to current "stack" location in the memory pool.
 double* mc      = smem;
+
+// A counter that keeps track of how many chunks have been allocated.
 int     counter = 0;


@@ -110,8 +128,8 @@ void* bl2_malloc_s( siz_t buf_size )
 	rmem = ( void* )mc;
 	mc += ( buf_size / sizeof( double ) );

-	if ( mc >= smem + ( N_ELEM_SMEM ) )
-		bl2_abort();
+	if ( mc >= smem + BLIS_NUM_ELEM_SMEM )
+		bl2_check_error_code( BLIS_EXHAUSTED_STATIC_MEMORY_POOL );

 	++counter;

@@ -128,10 +146,9 @@ void bl2_free_s( void* p )

 void bl2_mm_clear_smem( void )
 {
-	dim_t n = N_ELEM_SMEM;
 	dim_t i;

-	for ( i = 0; i < n; ++i )
+	for ( i = 0; i < BLIS_NUM_ELEM_SMEM; ++i )
 	{
 		smem[i] = 0.0;
 	}
--- a/frame/include/bl2_error_macro_defs.h
+++ b/frame/include/bl2_error_macro_defs.h
@@ -38,7 +38,7 @@
 // -- Error-related macros --

 // Used to determine the size of the array of error strings.
-#define BLIS_MAX_NUM_ERR_MSGS    100
+#define BLIS_MAX_NUM_ERR_MSGS    200
 #define BLIS_MAX_ERR_MSG_LENGTH  200

 // Used to insert filenames and line numbers into error-checking code.
--- a/frame/include/bl2_type_defs.h
+++ b/frame/include/bl2_type_defs.h
@@ -461,7 +461,10 @@ typedef enum
 	// Packing-specific errors
 	BLIS_PACK_SCHEMA_NOT_SUPPORTED_FOR_UNPACK  = ( -90),

-	BLIS_ERROR_CODE_MAX                        = (-100)
+	// Memory allocator errors
+	BLIS_EXHAUSTED_STATIC_MEMORY_POOL          = (-100),
+
+	BLIS_ERROR_CODE_MAX                        = (-110)
 } err_t;

 #endif
--- a/2
+++ b/2
@@ -1 +1 @@
-0.0.1-4
+0.0.1-5
@@ -1 +1 @@
 .0.1-4
 .0.1-5