From 262a62e3482c5caa947a89cabb562b5887555bd6 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Fri, 8 Jun 2018 12:10:54 -0500 Subject: [PATCH 1/5] Fixed undefined ref in steamroller/excavator configs. Details: - Fixed erroneous calls to bli_cntx_init_piledriver_ref() in bli_cntx_init_steamroller() and bli_cntx_init_excavator(), which should have been to their respectively-named bli_cntx_init_*() functions instead. Thanks to qnerd for bringing these bugs to our attention. --- config/excavator/bli_cntx_init_excavator.c | 2 +- config/steamroller/bli_cntx_init_steamroller.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/config/excavator/bli_cntx_init_excavator.c b/config/excavator/bli_cntx_init_excavator.c index 065154d27..56d04ef4e 100644 --- a/config/excavator/bli_cntx_init_excavator.c +++ b/config/excavator/bli_cntx_init_excavator.c @@ -39,7 +39,7 @@ void bli_cntx_init_excavator( cntx_t* cntx ) blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. - bli_cntx_init_piledriver_ref( cntx ); + bli_cntx_init_excavator_ref( cntx ); // ------------------------------------------------------------------------- diff --git a/config/steamroller/bli_cntx_init_steamroller.c b/config/steamroller/bli_cntx_init_steamroller.c index b1409e4fc..1b6566c5c 100644 --- a/config/steamroller/bli_cntx_init_steamroller.c +++ b/config/steamroller/bli_cntx_init_steamroller.c @@ -39,7 +39,7 @@ void bli_cntx_init_steamroller( cntx_t* cntx ) blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. - bli_cntx_init_piledriver_ref( cntx ); + bli_cntx_init_steamroller_ref( cntx ); // ------------------------------------------------------------------------- From f1908d39767baef56077def69126d96f805ee27e Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Fri, 8 Jun 2018 14:22:22 -0500 Subject: [PATCH 2/5] Fixed broken input.operations.fast. Details: - Removed three input lines from input.operations.fast (labeled "test sequential micro-kernel") that I intended to remove in bd02c4e. These lines prevented 'make check' (and 'make checkblis-fast') from completing correctly. Note: This bug was fixed in 3df39b3, but that commit has not yet been merged into master, hence this redundant commit. Thanks to Robert van de Geijn for reporting this issue. --- testsuite/input.operations.fast | 3 --- 1 file changed, 3 deletions(-) diff --git a/testsuite/input.operations.fast b/testsuite/input.operations.fast index d86de6ecc..d2a44276e 100644 --- a/testsuite/input.operations.fast +++ b/testsuite/input.operations.fast @@ -260,15 +260,12 @@ # --- Level-3 micro-kernels ------------------------------------------------ 1 # gemm -1 # test sequential micro-kernel -1 # dimensions: k 1 # trsm -1 # test sequential micro-kernel ? # parameters: uploa 1 # gemmtrsm -1 # test sequential micro-kernel -1 # dimensions: k ? # parameters: uploa From 043d0cd37ef4a27b1901eeb89d40083cfb2a57ba Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Sat, 9 Jun 2018 13:46:49 -0500 Subject: [PATCH 3/5] Implemented bli_acquire_mpart(), added example code. Details: - Implemented bli_acquire_mpart(), a general-purpose submatrix view function that will alias an obj_t to be a submatrix "view" of an existing obj_t. - Renumbered examples in examples/oapi and inserted a new example file, 03obj_view.c, which shows how to use bli_acquire_mpart() to obtain submatrix views of existing objects, which can then be used to indirectly modify the parent object. --- examples/oapi/{0obj_basic.c => 00obj_basic.c} | 0 .../oapi/{1obj_attach.c => 01obj_attach.c} | 0 examples/oapi/{2obj_ij.c => 02obj_ij.c} | 0 examples/oapi/03obj_view.c | 272 ++++++++++++++++++ examples/oapi/{3level0.c => 04level0.c} | 0 examples/oapi/{4level1v.c => 05level1v.c} | 0 examples/oapi/{5level1m.c => 06level1m.c} | 0 .../{6level1m_diag.c => 07level1m_diag.c} | 0 examples/oapi/{7level2.c => 08level2.c} | 0 examples/oapi/{8level3.c => 09level3.c} | 0 examples/oapi/{9util.c => 10util.c} | 0 examples/oapi/Makefile | 21 +- examples/oapi/README | 2 +- frame/base/bli_part.c | 43 +++ frame/base/bli_part.h | 10 + 15 files changed, 337 insertions(+), 11 deletions(-) rename examples/oapi/{0obj_basic.c => 00obj_basic.c} (100%) rename examples/oapi/{1obj_attach.c => 01obj_attach.c} (100%) rename examples/oapi/{2obj_ij.c => 02obj_ij.c} (100%) create mode 100644 examples/oapi/03obj_view.c rename examples/oapi/{3level0.c => 04level0.c} (100%) rename examples/oapi/{4level1v.c => 05level1v.c} (100%) rename examples/oapi/{5level1m.c => 06level1m.c} (100%) rename examples/oapi/{6level1m_diag.c => 07level1m_diag.c} (100%) rename examples/oapi/{7level2.c => 08level2.c} (100%) rename examples/oapi/{8level3.c => 09level3.c} (100%) rename examples/oapi/{9util.c => 10util.c} (100%) diff --git a/examples/oapi/0obj_basic.c b/examples/oapi/00obj_basic.c similarity index 100% rename from examples/oapi/0obj_basic.c rename to examples/oapi/00obj_basic.c diff --git a/examples/oapi/1obj_attach.c b/examples/oapi/01obj_attach.c similarity index 100% rename from examples/oapi/1obj_attach.c rename to examples/oapi/01obj_attach.c diff --git a/examples/oapi/2obj_ij.c b/examples/oapi/02obj_ij.c similarity index 100% rename from examples/oapi/2obj_ij.c rename to examples/oapi/02obj_ij.c diff --git a/examples/oapi/03obj_view.c b/examples/oapi/03obj_view.c new file mode 100644 index 000000000..a3dd4b247 --- /dev/null +++ b/examples/oapi/03obj_view.c @@ -0,0 +1,272 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include +#include "blis.h" + +void init_dmatrix_by_rows( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ); +void init_dmatrix_by_cols( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ); +void init_dobj_by_cols( obj_t* a ); +void init_zobj_by_cols( obj_t* a ); + +int main( int argc, char** argv ) +{ + obj_t a1, a2; + obj_t v1, v2, v3, v4, v5; + num_t dt; + dim_t m, n; + inc_t rs, cs; + dim_t i, j; + dim_t mv, nv; + + // + // This file demonstrates creating and submatrix views into existing matrices. + // + + // + // Example 1: Create an object and then create a submatrix view. + // + + printf( "\n#\n# -- Example 1 --\n#\n\n" ); + + // We'll use these parameters for the following examples. + dt = BLIS_DOUBLE; + m = 6; n = 7; rs = 1; cs = m; + + // Create an object a1 using bli_obj_create(). + bli_obj_create( dt, m, n, rs, cs, &a1 ); + + // Initialize a1 to contain known values. + init_dobj_by_cols( &a1 ) + + bli_printm( "matrix 'a1' (initial state)", &a1, "%5.1f", "" ); + + // Acquire a 4x3 submatrix view into a1 at (i,j) offsets (1,2). + i = 1; j = 2; mv = 4; nv = 3; + bli_acquire_mpart( i, j, mv, nv, &a1, &v1 ); + + bli_printm( "4x3 submatrix 'v1' at offsets (1,2)", &v1, "%5.1f", "" ); + + // NOTE: Submatrix views should never be passed to bli_obj_free(). It + // will not cause an immediate error, but it is bad practice. Instead, + // you should only release the objects that were created directy via + // bli_obj_create(). In the above example, that means only object a1 + // would be passed to bli_obj_free(). + + // + // Example 2: Modify the contents of a submatrix view. + // + + printf( "\n#\n# -- Example 2 --\n#\n\n" ); + + // Modify the first three elements of the first column. + bli_setijm( -3.0, 0.0, 0, 0, &v1 ); + bli_setijm( -4.0, 0.0, 1, 0, &v1 ); + bli_setijm( -5.0, 0.0, 2, 0, &v1 ); + + // Modify the first three elements of the second column. + bli_setijm( -6.0, 0.0, 0, 1, &v1 ); + bli_setijm( -7.0, 0.0, 1, 1, &v1 ); + bli_setijm( -8.0, 0.0, 2, 1, &v1 ); + + // Print the matrix again so we can see the update elements. + bli_printm( "submatrix view 'v1' (modified state)", &v1, "%5.1f", "" ); + bli_printm( "matrix 'a1' (indirectly modified due to changes to 'v1')", &a1, "%5.1f", "" ); + + // + // Example 3: Create a submatrix view that is "too big". + // + + printf( "\n#\n# -- Example 3 --\n#\n\n" ); + + // bli_acquire_mpart() will safely truncate your requested submatrix + // view dimensions (or even the offsets) if they extend beyond the + // bounds of the parent object. + + bli_printm( "matrix 'a1' (current state)", &a1, "%5.1f", "" ); + + // Acquire a 4x3 submatrix view into a1 at offsets (4,2). Notice how + // the requested view contains four rows, but the view is created with + // only two rows because the starting m offset of 4 leaves only two rows + // left in the parent matrix. + bli_acquire_mpart( 4, 2, 4, 3, &a1, &v2 ); + + bli_printm( "4x3 submatrix 'v2' at offsets (4,2) -- two rows truncated for safety", &v2, "%5.1f", "" ); + + // + // Example 4: Create a bufferless object, attach an external buffer, and + // then create a submatrix view. + // + + printf( "\n#\n# -- Example 4 --\n#\n\n" ); + + // Create a object with known elements using the same approach as the + // previous example file. + double* p1 = malloc( m * n * sizeof( double ) ); + init_dmatrix_by_cols( m, n, p1, rs, cs ); + bli_obj_create_with_attached_buffer( dt, m, n, p1, rs, cs, &a2 ); + + bli_printm( "matrix 'a2' (initial state)", &a2, "%5.1f", "" ); + + // Acquire a 3x4 submatrix view at offset (2,3). + bli_acquire_mpart( 2, 3, 3, 4, &a2, &v3 ); + + bli_printm( "3x4 submatrix view 'v3' at offsets (2,3)", &v3, "%5.1f", "" ); + + // + // Example 5: Use a submatrix view to set a region of a larger matrix to + // zero. + // + + printf( "\n#\n# -- Example 5 --\n#\n\n" ); + + bli_printm( "3x4 submatrix view 'v3' at offsets (2,3)", &v3, "%5.1f", "" ); + + bli_setm( &BLIS_ZERO, &v3 ); + + bli_printm( "3x4 submatrix view 'v3' (zeroed out)", &v3, "%5.1f", "" ); + + bli_printm( "matrix 'a2' (modified state)", &a2, "%5.1f", "" ); + + // + // Example 6: Obtain a submatrix view into a submatrix view. + // + + printf( "\n#\n# -- Example 6 --\n#\n\n" ); + + bli_acquire_mpart( 1, 1, 5, 6, &a2, &v4 ); + + bli_printm( "5x6 submatrix view 'v4' at offsets (1,1) of 'a2'", &v4, "%5.1f", "" ); + + bli_acquire_mpart( 1, 0, 4, 5, &v4, &v5 ); + + bli_printm( "4x5 submatrix view 'v5' at offsets (1,0) of 'v4'", &v5, "%5.1f", "" ); + + + // Free the memory arrays we allocated. + free( p1 ); + + // Free the objects we created. + bli_obj_free( &a1 ); + + return 0; +} + +// ----------------------------------------------------------------------------- + +void init_dmatrix_by_rows( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ) +{ + dim_t i, j; + + double alpha = 0.0; + + // Step through a matrix by rows, assigning each element a unique + // value, starting at 0. + for ( i = 0; i < m; ++i ) + { + for ( j = 0; j < n; ++j ) + { + double* a_ij = a + i*rs + j*cs; + + *a_ij = alpha; + + alpha += 1.0; + } + } +} + +void init_dmatrix_by_cols( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ) +{ + dim_t i, j; + + double alpha = 0.0; + + // Step through a matrix by columns, assigning each element a unique + // value, starting at 0. + for ( j = 0; j < n; ++j ) + { + for ( i = 0; i < m; ++i ) + { + double* a_ij = a + i*rs + j*cs; + + *a_ij = alpha; + + alpha += 1.0; + } + } +} + +void init_dobj_by_cols( obj_t* a ) +{ + dim_t m = bli_obj_length( a ); + dim_t n = bli_obj_width( a ); + dim_t i, j; + + double alpha = 0.0; + + // Step through a matrix by columns, assigning each element a unique + // value, starting at 0. + for ( j = 0; j < n; ++j ) + { + for ( i = 0; i < m; ++i ) + { + bli_setijm( alpha, 0.0, i, j, a ); + + alpha += 1.0; + } + } +} + +void init_zobj_by_cols( obj_t* a ) +{ + dim_t m = bli_obj_length( a ); + dim_t n = bli_obj_width( a ); + dim_t i, j; + + double alpha = 0.0; + + // Step through a matrix by columns, assigning each real and imaginary + // element a unique value, starting at 0. + for ( j = 0; j < n; ++j ) + { + for ( i = 0; i < m; ++i ) + { + bli_setijm( alpha, alpha + 1.0, i, j, a ); + + alpha += 2.0; + } + } +} + diff --git a/examples/oapi/3level0.c b/examples/oapi/04level0.c similarity index 100% rename from examples/oapi/3level0.c rename to examples/oapi/04level0.c diff --git a/examples/oapi/4level1v.c b/examples/oapi/05level1v.c similarity index 100% rename from examples/oapi/4level1v.c rename to examples/oapi/05level1v.c diff --git a/examples/oapi/5level1m.c b/examples/oapi/06level1m.c similarity index 100% rename from examples/oapi/5level1m.c rename to examples/oapi/06level1m.c diff --git a/examples/oapi/6level1m_diag.c b/examples/oapi/07level1m_diag.c similarity index 100% rename from examples/oapi/6level1m_diag.c rename to examples/oapi/07level1m_diag.c diff --git a/examples/oapi/7level2.c b/examples/oapi/08level2.c similarity index 100% rename from examples/oapi/7level2.c rename to examples/oapi/08level2.c diff --git a/examples/oapi/8level3.c b/examples/oapi/09level3.c similarity index 100% rename from examples/oapi/8level3.c rename to examples/oapi/09level3.c diff --git a/examples/oapi/9util.c b/examples/oapi/10util.c similarity index 100% rename from examples/oapi/9util.c rename to examples/oapi/10util.c diff --git a/examples/oapi/Makefile b/examples/oapi/Makefile index 905ef6727..08964e479 100644 --- a/examples/oapi/Makefile +++ b/examples/oapi/Makefile @@ -105,16 +105,17 @@ CFLAGS += -I$(TEST_SRC_PATH) LIBBLIS_LINK := $(BUILD_PATH)/$(LIBBLIS_LINK) # Binary executable name. -TEST_BINS := 0obj_basic.x \ - 1obj_attach.x \ - 2obj_ij.x \ - 3level0.x \ - 4level1v.x \ - 5level1m.x \ - 6level1m_diag.x \ - 7level2.x \ - 8level3.x \ - 9util.x +TEST_BINS := 00obj_basic.x \ + 01obj_attach.x \ + 02obj_ij.x \ + 03obj_view.x \ + 04level0.x \ + 05level1v.x \ + 06level1m.x \ + 07level1m_diag.x \ + 08level2.x \ + 09level3.x \ + 10util.x diff --git a/examples/oapi/README b/examples/oapi/README index 28cc6d84e..adf7ded9d 100644 --- a/examples/oapi/README +++ b/examples/oapi/README @@ -6,7 +6,7 @@ This directory contains several files, each containing various pieces of example code that demonstrate core functionality of the object API in BLIS. These example files should be thought of collectively like a tutorial, and therefore it is recommended to start from the beginning (the file that -starts in '0'). +starts in '00'). You can build all of the examples by simply running 'make' from this directory. (You can also run 'make clean'.) The makefile assumes that diff --git a/frame/base/bli_part.c b/frame/base/bli_part.c index 47fa4fdfd..d05eabb79 100644 --- a/frame/base/bli_part.c +++ b/frame/base/bli_part.c @@ -38,6 +38,49 @@ // -- Matrix partitioning ------------------------------------------------------ +void bli_acquire_mpart + ( + dim_t i, + dim_t j, + dim_t bm, + dim_t bn, + obj_t* parent, + obj_t* child + ) +{ + // Query the dimensions of the parent object. + const dim_t m_par = bli_obj_length( parent ); + const dim_t n_par = bli_obj_width( parent ); + + // If either i or j is already beyond what exists of the parent matrix, + // slide them back to the outer dimensions. (What will happen in this + // scenario is that bm and bn and/or will be reduced to zero so that the + // child matrix does not refer to anything beyond the bounds of the + // parent. (Note: This is a safety measure and generally should never + // be needed if the caller is passing in sane arguments.) + if ( i > m_par ) i = m_par; + if ( j > n_par ) j = n_par; + + // If either bm or bn spills out over the edge of the parent matrix, + // reduce them so that the child matrix fits within the bounds of the + // parent. (Note: This is a safety measure and generally should never + // be needed if the caller is passing in sane arguments, though this + // code is somewhat more likely to be needed than the code above.) + if ( bm > m_par - i ) bm = m_par - i; + if ( bn > n_par - j ) bn = n_par - j; + + // Alias the parent object's contents into the child object. + bli_obj_alias_to( parent, child ); + + // Set the offsets and dimensions of the child object. Note that we + // increment, rather than overwrite, the offsets of the child object + // in case the parent object already had non-zero offsets (usually + // because the parent was itself a child a larger grandparent object). + bli_obj_inc_offs( i, j, child ); + bli_obj_set_dims( bm, bn, child ); +} + + void bli_acquire_mpart_mdim ( dir_t direct, diff --git a/frame/base/bli_part.h b/frame/base/bli_part.h index fd24f1d82..284a87ffa 100644 --- a/frame/base/bli_part.h +++ b/frame/base/bli_part.h @@ -36,6 +36,16 @@ // -- Matrix partitioning ------------------------------------------------------ +void bli_acquire_mpart + ( + dim_t i, + dim_t j, + dim_t m, + dim_t n, + obj_t* obj, + obj_t* sub_obj + ); + #undef GENPROT #define GENPROT( opname ) \ \ From 712de9b371a8727682352a2f52cd4880de905f0b Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Sat, 9 Jun 2018 14:36:30 -0500 Subject: [PATCH 4/5] Added missing semicolon in 03obj_view.c Details: - Thanks to Tony Skjellum for pointing out this typo due to a last-minute change to the source prior to committing. --- examples/oapi/03obj_view.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/oapi/03obj_view.c b/examples/oapi/03obj_view.c index a3dd4b247..68c9c922f 100644 --- a/examples/oapi/03obj_view.c +++ b/examples/oapi/03obj_view.c @@ -69,7 +69,7 @@ int main( int argc, char** argv ) bli_obj_create( dt, m, n, rs, cs, &a1 ); // Initialize a1 to contain known values. - init_dobj_by_cols( &a1 ) + init_dobj_by_cols( &a1 ); bli_printm( "matrix 'a1' (initial state)", &a1, "%5.1f", "" ); From 2610fff0b07bdb345cb2e334ef6bea0c63c8cead Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Mon, 11 Jun 2018 12:32:54 -0500 Subject: [PATCH 5/5] Renamed 1m packm kernels from _1e to _1er. Details: - Renamed the reference packm kernels used by 1m. Previously, they used a _1e suffix, which was confusing since they packed to both 1e and 1r schemas. This was likely an artifact of the time when there were separate kernels for each schema before I decided to combine them into a single function (per datatype and panel dimension), and the 1e functions were the ones to inherit the 1r functionality. The kernels have now been renamed to use a _1er suffix. --- ref_kernels/1m/bli_packm_cxk_1er_ref.c | 18 +++++++++--------- ref_kernels/bli_cntx_ref.c | 18 +++++++++--------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/ref_kernels/1m/bli_packm_cxk_1er_ref.c b/ref_kernels/1m/bli_packm_cxk_1er_ref.c index 3c526506d..693fd3c47 100644 --- a/ref_kernels/1m/bli_packm_cxk_1er_ref.c +++ b/ref_kernels/1m/bli_packm_cxk_1er_ref.c @@ -189,7 +189,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_2xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_2xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) @@ -364,7 +364,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_4xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_4xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) @@ -555,7 +555,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_6xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_6xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) @@ -762,7 +762,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_8xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_8xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) @@ -985,7 +985,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_10xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_10xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) @@ -1224,7 +1224,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_12xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_12xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) @@ -1479,7 +1479,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_14xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_14xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) @@ -1750,7 +1750,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_16xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_16xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) @@ -2133,5 +2133,5 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_30xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_30xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) diff --git a/ref_kernels/bli_cntx_ref.c b/ref_kernels/bli_cntx_ref.c index cc5828f4d..81df4f9c4 100644 --- a/ref_kernels/bli_cntx_ref.c +++ b/ref_kernels/bli_cntx_ref.c @@ -221,23 +221,23 @@ #define packm_30xk_rih_ker_name GENARNAME(packm_30xk_rih) #undef packm_2xk_1er_ker_name -#define packm_2xk_1er_ker_name GENARNAME(packm_2xk_1e) +#define packm_2xk_1er_ker_name GENARNAME(packm_2xk_1er) #undef packm_4xk_1er_ker_name -#define packm_4xk_1er_ker_name GENARNAME(packm_4xk_1e) +#define packm_4xk_1er_ker_name GENARNAME(packm_4xk_1er) #undef packm_6xk_1er_ker_name -#define packm_6xk_1er_ker_name GENARNAME(packm_6xk_1e) +#define packm_6xk_1er_ker_name GENARNAME(packm_6xk_1er) #undef packm_8xk_1er_ker_name -#define packm_8xk_1er_ker_name GENARNAME(packm_8xk_1e) +#define packm_8xk_1er_ker_name GENARNAME(packm_8xk_1er) #undef packm_10xk_1er_ker_name -#define packm_10xk_1er_ker_name GENARNAME(packm_10xk_1e) +#define packm_10xk_1er_ker_name GENARNAME(packm_10xk_1er) #undef packm_12xk_1er_ker_name -#define packm_12xk_1er_ker_name GENARNAME(packm_12xk_1e) +#define packm_12xk_1er_ker_name GENARNAME(packm_12xk_1er) #undef packm_14xk_1er_ker_name -#define packm_14xk_1er_ker_name GENARNAME(packm_14xk_1e) +#define packm_14xk_1er_ker_name GENARNAME(packm_14xk_1er) #undef packm_16xk_1er_ker_name -#define packm_16xk_1er_ker_name GENARNAME(packm_16xk_1e) +#define packm_16xk_1er_ker_name GENARNAME(packm_16xk_1er) #undef packm_30xk_1er_ker_name -#define packm_30xk_1er_ker_name GENARNAME(packm_30xk_1e) +#define packm_30xk_1er_ker_name GENARNAME(packm_30xk_1er) // Include the level-1m kernel API template. #include "bli_l1m_ker.h"